IVGCVSW-2093 Add SpaceToBatchNd layer and corresponding no-op factory implementations
[platform/upstream/armnn.git] / src / backends / test / LayerTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "LayerTests.hpp"
6
7 #include "test/TensorHelpers.hpp"
8 #include "TensorCopyUtils.hpp"
9 #include "Permute.hpp"
10
11 #include <boost/test/unit_test.hpp>
12 #include <boost/assert.hpp>
13
14 #include <armnn/LayerSupport.hpp>
15
16 #include <backends/CpuTensorHandle.hpp>
17 #include <backends/WorkloadFactory.hpp>
18
19 #include <algorithm>
20 #include <boost/cast.hpp>
21
22 #include "WorkloadTestUtils.hpp"
23 #include "Conv2dTestImpl.hpp"
24 #include "BatchNormTestImpl.hpp"
25 #include "ActivationTestImpl.hpp"
26 #include "Pooling2dTestImpl.hpp"
27 #include "ReshapeTestImpl.hpp"
28 #include "FullyConnectedTestImpl.hpp"
29 #include "SplitterTestImpl.hpp"
30 #include "SoftmaxTestImpl.hpp"
31 #include "NormTestImpl.hpp"
32 #include "PermuteTestImpl.hpp"
33 #include "LstmTestImpl.hpp"
34 #include "ConvertFp16ToFp32TestImpl.hpp"
35 #include "ConvertFp32ToFp16TestImpl.hpp"
36
37 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
38 static std::vector<float> ConvInput3x8x16({
39     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
40     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
41     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
42     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
63 });
64
65 // 2-channel bias used by a number of Conv2d tests.
66 static std::vector<float> Bias2({0, 2});
67
68 // Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled.
69 template<typename T>
70 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset)
71 {
72     if(biasEnabled)
73     {
74         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, armnn::GetDataType<T>());
75         boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2));
76         return bias;
77     }
78     else
79     {
80         return boost::multi_array<T, 1>();
81     }
82 }
83
84 template<typename T>
85 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory,
86                                                        float                    qScale,
87                                                        int32_t                  qOffset,
88                                                        bool                     biasEnabled,
89                                                        const armnn::DataLayoutIndexed& layout)
90 {
91     // Use common single-batch 3-channel 16x8 image.
92     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
93     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
94
95     // Use a 2-element batch with 3-channel 3x5 kernels.
96     armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType<T>());
97     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
98         QuantizedVector<T>(qScale, qOffset, {
99             1, 1, 1,
100             1, -1, 1,
101             1, 1, 1,
102             1, 1, 1,
103             1, 1, 1,
104
105             0, 0, 0,
106             0, 0, 0,
107             0, 0, 0,
108             0, 0, 0,
109             0, 0, 0,
110
111             2, 2, 2,
112             2, 2, 2,
113             2, 2, 2,
114             2, 2, 2,
115             2, 2, 2,
116
117
118             0, 0, 0,
119             0, 0, 0,
120             0, 0, 0,
121             0, 0, 0,
122             0, 0, 0,
123
124             1, 1, 1,
125             1, 1, 1,
126             1, 1, 1,
127             1, 1, 1,
128             1, 1, 1,
129
130             0, 0, 0,
131             0, 0, 0,
132             0, 0, 0,
133             0, 0, 0,
134             0, 0, 0
135         })));
136
137     // Expected output is 2 batch elements of a 1-channel 14x4 image.
138     armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType<T>());
139     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
140         QuantizedVector<T>(qScale, qOffset, {
141             -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
142             -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
143             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
144             -23.5f, -23.5f, -23.5f,
145             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
146             -23.5f, -23.5f, -23.5f,
147
148             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
149             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
152         })));
153
154     return SimpleConvolution2dTestImpl<T>(workloadFactory,
155       input,
156       kernel,
157       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
158       expectedOutput,
159       qScale,
160       qOffset,
161       layout);
162 }
163
164 template<typename T>
165 LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory,
166                                                        float                    qScale,
167                                                        int32_t                  qOffset,
168                                                        bool                     biasEnabled,
169                                                        const armnn::DataLayoutIndexed& layout)
170 {
171     // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
172
173     // Use common single-batch 3-channel 16x8 image.
174     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
175     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
176
177     // Use a 2-element batch of 3-channel 3x3 kernels.
178     armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType<T>());
179     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
180         QuantizedVector<T>(qScale, qOffset, {
181             1, 1, 1,
182             1, -1, 1,
183             1, 1, 1,
184
185             0, 0, 0,
186             0, 0, 0,
187             0, 0, 0,
188
189             2, 2, 2,
190             2, 2, 2,
191             2, 2, 2,
192
193
194             0, 0, 0,
195             0, 0, 0,
196             0, 0, 0,
197
198             1, 1, 1,
199             1, 1, 1,
200             1, 1, 1,
201
202             0, 0, 0,
203             0, 0, 0,
204             0, 0, 0
205         })));
206
207     // Expected output is 1 batch of a 2-channel 14x6 image.
208     armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType<T>());
209     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
210         QuantizedVector<T>(qScale, qOffset, {
211             -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
212             -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
213             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
214             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
215             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
216             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
217
218             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
224         })));
225
226     return SimpleConvolution2dTestImpl<T>(workloadFactory,
227       input,
228       kernel,
229       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
230       expectedOutput,
231       qScale,
232       qOffset,
233       layout);
234 }
235
236 template<typename T>
237 LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory& workloadFactory,
238                                                            float                    qScale,
239                                                            int32_t                  qOffset,
240                                                            bool                     biasEnabled,
241                                                            armnn::DataLayout        dataLayout)
242 {
243     // Use common single-batch 5x5 image.
244
245     armnn::TensorInfo inputDesc({1, 3, 4, 1}, armnn::GetDataType<T>());
246     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
247                                                       {
248                                                        1, 5, 2, 3,
249                                                        8, 7, 3, 6,
250                                                        3, 3, 9, 1
251                                                        });
252
253
254     // Use a 2-element batch of 3-channel 3x3 kernels.
255     armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::GetDataType<T>());
256     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
257                                                                     4, 5, 6,
258                                                                     0, 0, 0,
259                                                                     3, 2, 1
260                                                                     });
261
262     // Expected output is 1 batch of a 5x5 image.
263     armnn::TensorInfo outputDesc({1, 3, 4, 1}, armnn::GetDataType<T>());
264
265     const std::vector<float> outputData =
266             {
267                     23, 41, 33, 21,
268                     44, 65, 76, 52,
269                     82, 85, 79, 42
270             };
271
272     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
273
274     return SimpleConvolution2dNhwcTestImpl<T>(workloadFactory,
275                                               input,
276                                               kernel,
277                                               boost::multi_array<T, 1>(),
278                                               expectedOutput,
279                                               dataLayout,
280                                               qScale,
281                                               qOffset);
282 }
283
284 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory,
285                                                      bool                     biasEnabled,
286                                                      const armnn::DataLayoutIndexed& layout)
287 {
288     return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled, layout);
289 }
290
291 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
292                                                             bool                     biasEnabled,
293                                                             const armnn::DataLayoutIndexed& layout)
294 {
295     return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled, layout);
296 }
297
298 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory,
299                                                      bool                     biasEnabled,
300                                                      const armnn::DataLayoutIndexed& layout)
301 {
302     return SimpleConvolution2d3x3TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled, layout);
303 }
304
305 LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory& workloadFactory,
306                                                          bool                     biasEnabled)
307 {
308     return SimpleConvolution2d3x3NhwcTestCommon<float>(workloadFactory, 0.f, 0, biasEnabled, armnn::DataLayout::NHWC);
309 }
310
311 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory,
312                                                             bool                     biasEnabled,
313                                                             const armnn::DataLayoutIndexed& layout)
314 {
315     return SimpleConvolution2d3x3TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled, layout);
316 }
317
318 template<typename T>
319 LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
320     armnn::IWorkloadFactory& workloadFactory,
321     const armnn::DataLayoutIndexed& layout,
322     float                    qScale,
323     int32_t                  qOffset)
324 {
325     // Use a single-batch 1-channel 3x3 image as input.
326     armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType<T>());
327     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
328         QuantizedVector<T>(qScale, qOffset, {
329             11,21,31,
330             12,22,32,
331             13,23,33
332         })));
333
334     // Use 1 batch of a 1-channel 2x2 kernel.
335     armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType<T>());
336     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
337         QuantizedVector<T>(qScale, qOffset, {
338             -11,-21,
339             -12,-22,
340         })));
341
342 // Expected output is 1 batch of a 1-channel 6x8 image.
343 // Manually calculated like this:
344 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
345 //[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
346 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
347 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
348 //[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
349 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
350 //[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
351     armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType<T>());
352     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
353         QuantizedVector<T>(qScale, qOffset, {
354                0,    0,      0,    0,    0,    0,
355             -242,  -594,  -934, -372,    0,    0,
356             -495, -1190, -1850, -725,    0,    0,
357             -538, -1256, -1916, -748,    0,    0,
358             -273, -626,  -946,  -363,    0,    0,
359                0,    0,     0,     0,    0,    0,
360                0,    0,     0,     0,    0,    0,
361                0,    0,     0,     0,    0,    0
362         })));
363
364     return SimpleConvolution2dTestImpl<T>(workloadFactory,
365       input,
366       kernel,
367       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset),
368       expectedOutput,
369       qScale,
370       qOffset,
371       layout,
372       1,  // Padding left.
373       2,  // Padding top.
374       3,  // Padding right.
375       4); // Padding bottom.
376 }
377
378 template<typename T>
379 LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory,
380                                                                      const armnn::DataLayoutIndexed& layout,
381                                                                      float qScale,
382                                                                      int32_t qOffset)
383 {
384     // Use a single-batch 1-channel 5x5 image as input.
385     armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>());
386     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
387         QuantizedVector<T>(qScale, qOffset, {
388             11,21,31,41,51,
389             12,22,32,42,52,
390             13,23,33,43,53,
391             14,24,34,44,54,
392             15,25,35,45,55,
393         })));
394
395     // Use 1 batch of a 1-channel 4x4 kernel.
396     armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType<T>());
397     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
398         QuantizedVector<T>(qScale, qOffset, {
399             -11,-21,-31,-41,
400             -12,-22,-32,-42,
401             -13,-23,-33,-43,
402             -14,-24,-34,-44,
403         })));
404
405     // Expected output is 1 batch of a 1-channel 5x5 image.
406     armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>());
407     std::vector<T> myVec(outputDesc.GetNumElements(), 0);
408     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
409         QuantizedVector<T>(qScale, qOffset, {
410             -7140, -10580, -13940,  -9300, -5230,
411             -9590, -14120, -18520, -12290, -6860,
412             -9980, -14560, -18960, -12560, -7000,
413             -7518, -10904, -14144,  -9318, -5152,
414             -5032,  -7256,  -9376,  -6142, -3368,
415         })));
416
417     return SimpleConvolution2dTestImpl<T>(workloadFactory,
418         input,
419         kernel,
420         GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset),
421         expectedOutput,
422         qScale,
423         qOffset,
424         layout,
425         1,  // Padding left.
426         1,  // Padding top.
427         2,  // Padding right.
428         2); // Padding bottom.
429 }
430
431 template<typename T>
432 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory,
433                                                                  float qScale,
434                                                                  int32_t qOffset,
435                                                                  bool biasEnabled,
436                                                                  const armnn::DataLayoutIndexed& layout)
437 {
438     // Use a single-batch 2-channel 5x5 image as input.
439     armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>());
440     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
441         QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
442              0,  1,  2,  3,  4,
443              5,  6,  7,  8,  9,
444             10, 11, 12, 13, 14,
445             15, 16, 17, 18, 19,
446             20, 21, 22, 23, 24,
447
448             25, 26, 27, 28, 29,
449             30, 31, 32, 33, 34,
450             35, 36, 37, 38, 39,
451             40, 41, 42, 43, 44,
452             45, 46, 47, 48, 49
453         })));
454
455     // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
456     armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>());
457     auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
458         QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
459             32, 31, 30, 29,
460             28, 27, 26, 25,
461             24, 23, 22, 21,
462             20, 19, 18, 17,
463
464             16, 15, 14, 13,
465             12, 11, 10,  9,
466              8,  7,  6,  5,
467              4,  3,  2,  1
468         })));
469
470     // Expected output is 1 batch of a 2-channel 5x5 image.
471     // Calculated using the python tensorflow library with strideX=1, strideY=1.
472     armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>());
473     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
474         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
475             1062, 1580, 1850, 1530, 1117,
476             2140, 3108, 3500, 2842, 2042,
477             3580, 5068, 5460, 4342, 3062,
478             3618, 5072, 5390, 4248, 2971,
479             3074, 4282, 4510, 3533, 2457,
480             1550, 2284, 2362, 1955, 1428,
481             2910, 4206, 4342, 3528, 2536,
482             3390, 4886, 5022, 4068, 2916,
483             3566, 5056, 5182, 4133, 2922,
484             3100, 4352, 4452, 3517, 2465
485         })));
486
487     return DepthwiseConvolution2dAsymmetricTestImpl<T>(workloadFactory,
488         input,
489         kernel,
490         GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
491         expectedOutput,
492         qScale,
493         qOffset,
494         layout,
495         1,  // Padding left.
496         1,  // Padding top.
497         2,  // Padding right.
498         2,  // Padding bottom.
499         1,  // strideX
500         1); // strideY
501 }
502
503 template<typename T>
504 LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory& workloadFactory,
505                                                            float qScale,
506                                                            int32_t qOffset,
507                                                            bool biasEnabled)
508 {
509     armnn::TensorInfo inputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>());
510     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
511         QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
512             0, 25,
513             1, 26,
514             2, 27,
515             3, 28,
516             4, 29,
517
518             5, 30,
519             6, 31,
520             7, 32,
521             8, 33,
522             9, 34,
523
524             10, 35,
525             11, 36,
526             12, 37,
527             13, 38,
528             14, 39,
529
530             15, 40,
531             16, 41,
532             17, 42,
533             18, 43,
534             19, 44,
535
536             20, 45,
537             21, 46,
538             22, 47,
539             23, 48,
540             24, 49
541         })));
542
543     armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2}, armnn::GetDataType<T>());
544     auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
545         QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
546              32, 16,
547              31, 15,
548              30, 14,
549              29, 13,
550
551              28, 12,
552              27, 11,
553              26, 10,
554              25,  9,
555
556              24,  8,
557              23,  7,
558              22,  6,
559              21,  5,
560
561              20,  4,
562              19,  3,
563              18,  2,
564              17,  1
565         })));
566
567     armnn::TensorInfo outputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>());
568     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
569         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
570         1062, 1550,
571         1580, 2284,
572         1850, 2362,
573         1530, 1955,
574         1117, 1428,
575
576         2140, 2910,
577         3108, 4206,
578         3500, 4342,
579         2842, 3528,
580         2042, 2536,
581
582         3580, 3390,
583         5068, 4886,
584         5460, 5022,
585         4342, 4068,
586         3062, 2916,
587
588         3618, 3566,
589         5072, 5056,
590         5390, 5182,
591         4248, 4133,
592         2971, 2922,
593
594         3074, 3100,
595         4282, 4352,
596         4510, 4452,
597         3533, 3517,
598         2457, 2465
599         })));
600
601     return DepthwiseConvolution2dNhwcTestImpl<T>(workloadFactory,
602         input,
603         kernel,
604         GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
605         expectedOutput,
606         qScale,
607         qOffset,
608         1,  // Padding left.
609         1,  // Padding top.
610         2,  // Padding right.
611         2,  // Padding bottom.
612         1,  // strideX
613         1);  // strideY
614 }
615
616 LayerTestResult<float, 4>
617 Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory,
618                                                            const armnn::DataLayoutIndexed& layout)
619 {
620     return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon<float>(workloadFactory, layout, 0.0f, 0);
621 }
622
623 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory,
624                                                              const armnn::DataLayoutIndexed& layout)
625 {
626     return SimpleConvolution2dAsymmetricPaddingTestCommon<float>(workloadFactory, layout, 0.0f, 0);
627 }
628
629 LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
630                                                      bool                     biasEnabled,
631                                                      const armnn::DataLayoutIndexed& layout)
632 {
633     return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled, layout);
634 }
635
636 LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory& workloadFactory,
637                                                               bool biasEnabled)
638 {
639     return DepthwiseConvolution2dNhwcTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled);
640 }
641
642 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
643                                                               bool biasEnabled,
644                                                               const armnn::DataLayoutIndexed& layout)
645 {
646     return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled, layout);
647 }
648
649 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory,
650                                                                bool biasEnabled,
651                                                                const armnn::DataLayoutIndexed& layout)
652 {
653     return DepthwiseConvolution2dAsymmetricTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled, layout);
654 }
655
656 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
657                                                             bool                     biasEnabled,
658                                                             const armnn::DataLayoutIndexed& layout)
659 {
660     return DepthwiseConvolution2dTestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled, layout);
661 }
662
663 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory,
664                                                                      bool biasEnabled,
665                                                                      const armnn::DataLayoutIndexed& layout)
666 {
667     return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled, layout);
668 }
669
670 LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
671 {
672     return Convolution1dTestImpl<float>(workloadFactory, 0.0f, 0, biasEnabled);
673 }
674
675 LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
676 {
677     return Convolution1dTestImpl<uint8_t>(workloadFactory, 0.1f, 128, biasEnabled);
678 }
679
680 LayerTestResult<float,4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
681                                                   armnn::IWorkloadFactory& refWorkloadFactory)
682 {
683     return CompareConvolution2dTestImpl<float>(workloadFactory, refWorkloadFactory);
684 }
685
686 template<typename T>
687 LayerTestResult<T,4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
688                                                        armnn::IWorkloadFactory& refWorkloadFactory,
689                                                        const armnn::DataLayoutIndexed& layout)
690 {
691     return CompareDepthwiseConvolution2dTestImpl<T>(workloadFactory, refWorkloadFactory, layout);
692 }
693
694 template LayerTestResult<float, 4> CompareDepthwiseConvolution2dTest<float>(
695     armnn::IWorkloadFactory&, armnn::IWorkloadFactory&, const armnn::DataLayoutIndexed&);
696 template LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dTest<uint8_t>(
697     armnn::IWorkloadFactory&, armnn::IWorkloadFactory&, const armnn::DataLayoutIndexed&);
698
699 LayerTestResult<float,4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory)
700 {
701     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
702     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
703     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
704 }
705
706 LayerTestResult<float,4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory)
707 {
708     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
709     auto normChannel = armnn::NormalizationAlgorithmChannel::Within;
710     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
711 }
712
713 LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(armnn::IWorkloadFactory& workloadFactory)
714 {
715     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
716     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
717     return SimpleNormalizationNhwcTestImpl(workloadFactory, normChannel, normMethod);
718 }
719
720 LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta)
721 {
722     return SimpleSoftmaxTestImpl<float>(workloadFactory, beta);
723 }
724
725 LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta)
726 {
727     return SimpleSoftmaxTestImpl<uint8_t>(workloadFactory, beta);
728 }
729
730 LayerTestResult<float,4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory,
731                                                   armnn::IWorkloadFactory& refWorkloadFactory,
732                                                   armnn::NormalizationAlgorithmChannel normChannel,
733                                                   armnn::NormalizationAlgorithmMethod normMethod)
734 {
735     return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod);
736 }
737
738 LayerTestResult<float,2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory,
739     armnn::IWorkloadFactory& refWorkloadFactory,
740     float beta)
741 {
742     return CompareSoftmaxTestImpl<float>(workloadFactory, refWorkloadFactory, beta);
743 }
744
745 LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory,
746     armnn::IWorkloadFactory& refWorkloadFactory,
747     float beta)
748 {
749     return CompareSoftmaxTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, beta);
750 }
751
752 std::vector<LayerTestResult<float,3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory)
753 {
754     return SplitterTestCommon<float>(workloadFactory);
755 }
756
757 std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory)
758 {
759     return SplitterTestCommon<uint8_t>(workloadFactory, 1.0f, 0);
760 }
761
762 LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory)
763 {
764     return CopyViaSplitterTestImpl<float>(workloadFactory, 0.0f, 0);
765 }
766
767 LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory)
768 {
769     return CopyViaSplitterTestImpl<uint8_t>(workloadFactory, 1.0f, 0);
770 }
771
772 LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
773         armnn::IWorkloadFactory& workloadFactory)
774 {
775     armnn::TensorInfo inputDesc({ 2, 2 }, armnn::GetDataType<float>());
776     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
777             { 2., 3., 3., 4. }));
778
779     armnn::TensorInfo outputDesc({ 2, 4 }, armnn::GetDataType<float>());
780     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
781             {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
782              -0.42734814f, -0.00478661f,  0.13455015f, -0.03560682f}));
783     return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput);
784 }
785
786 LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
787         armnn::IWorkloadFactory& workloadFactory)
788 {
789     armnn::TensorInfo inputDesc({ 2, 5 }, armnn::GetDataType<float>());
790     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
791             {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
792              0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}));
793
794     armnn::TensorInfo outputDesc({ 2, 16 }, armnn::GetDataType<float>());
795     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
796             {-0.00396806f, 0.029352f,     -0.00279226f, 0.0159977f,   -0.00835576f,
797              -0.0211779f,  0.0283512f,    -0.0114597f,  0.00907307f,  -0.0244004f,
798              -0.0152191f,  -0.0259063f,   0.00914318f,  0.00415118f,  0.017147f,
799              0.0134203f, -0.013869f,    0.0287268f,   -0.00334693f, 0.00733398f,  -0.0287926f,
800              -0.0186926f,   0.0193662f,   -0.0115437f,  0.00422612f,  -0.0345232f,
801              0.00223253f,   -0.00957321f, 0.0210624f,   0.013331f,    0.0150954f,
802              0.02168f}));
803     return LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(workloadFactory, input, expectedOutput);
804 }
805
806 LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory)
807 {
808     armnn::TensorInfo inputDesc({2, 2}, armnn::GetDataType<float>());
809     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
810             {2., 3., 3., 4.}));
811
812
813     armnn::TensorInfo outputDesc({2, 4}, armnn::GetDataType<float>());
814     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
815             {{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
816               -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f}}));
817
818     return LstmNoCifgNoPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput);
819 }
820
821 LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory)
822 {
823     unsigned int outputWidth = 3;
824     unsigned int outputHeight = 6;
825     unsigned int outputChannels = 3;
826
827     unsigned int inputWidth1 = 3;
828     unsigned int inputHeight1 = 6;
829     unsigned int inputChannels1 = 2;
830
831     unsigned int inputWidth2 = 3;
832     unsigned int inputHeight2 = 6;
833     unsigned int inputChannels2 = 1;
834
835     // Define the tensor descriptors.
836     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
837     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
838     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
839
840     LayerTestResult<float,3> ret(outputTensorInfo);
841
842     ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
843     {
844             1.0f, 2.0f, 3.0f,
845             4.0f, 5.0f, 6.0f,
846             7.0f, 8.0f, 9.0f,
847             10.0f, 11.0f, 12.0f,
848             13.0f, 14.0f, 15.0f,
849             16.0f, 17.0f, 18.0f,
850
851             19.0f, 20.0f, 21.0f,
852             22.0f, 23.0f, 24.0f,
853             25.0f, 26.0f, 27.0f,
854             28.0f, 29.0f, 30.0f,
855             31.0f, 32.0f, 33.0f,
856             34.0f, 35.0f, 36.0f,
857
858             37.0f, 38.0f, 39.0f,
859             40.0f, 41.0f, 42.0f,
860             43.0f, 44.0f, 45.0f,
861             46.0f, 47.0f, 48.0f,
862             49.0f, 50.0f, 51.0f,
863             52.0f, 53.0f, 54.0f,
864         })
865     );
866
867     auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
868         {
869             1.0f, 2.0f, 3.0f,
870             4.0f, 5.0f, 6.0f,
871             7.0f, 8.0f, 9.0f,
872             10.0f, 11.0f, 12.0f,
873             13.0f, 14.0f, 15.0f,
874             16.0f, 17.0f, 18.0f,
875
876             19.0f, 20.0f, 21.0f,
877             22.0f, 23.0f, 24.0f,
878             25.0f, 26.0f, 27.0f,
879             28.0f, 29.0f, 30.0f,
880             31.0f, 32.0f, 33.0f,
881             34.0f, 35.0f, 36.0f,
882         })
883     );
884
885     auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
886         {
887             37.0f, 38.0f, 39.0f,
888             40.0f, 41.0f, 42.0f,
889             43.0f, 44.0f, 45.0f,
890             46.0f, 47.0f, 48.0f,
891             49.0f, 50.0f, 51.0f,
892             52.0f, 53.0f, 54.0f,
893         })
894     );
895
896     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
897     armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
898
899     std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
900     armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
901
902     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
903
904     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
905
906     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
907         subTensorsSupported ?
908             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
909             workloadFactory.CreateTensorHandle(inputTensorInfo1);
910
911     std::unique_ptr<armnn::ITensorHandle> inputHandle2  =
912         subTensorsSupported ?
913             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
914             workloadFactory.CreateTensorHandle(inputTensorInfo2);
915
916     armnn::MergerQueueDescriptor data;
917     armnn::WorkloadInfo info;
918     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
919     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
920     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
921
922     data.m_ViewOrigins.push_back(window1);
923     data.m_ViewOrigins.push_back(window2);
924
925     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
926
927     inputHandle1->Allocate();
928     inputHandle2->Allocate();
929     outputHandle->Allocate();
930
931     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
932     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
933
934     workloadFactory.Finalize();
935     workload->Execute();
936
937     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
938
939     return ret;
940 }
941
942 LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory)
943 {
944     unsigned int batchSize = 2;
945     unsigned int channels  = 2;
946     unsigned int height    = 2;
947     unsigned int width     = 3;
948
949     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
950     armnn::TensorInfo outputTensorInfo;
951
952     unsigned int shape[] = {batchSize, channels, height, width};
953
954     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
955     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
956     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
957
958
959     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>(
960         {
961             0.0f, 2.0f, 1.0f,
962             0.2f, 1.0f, 2.0f,
963
964             1.0f, 2.0f, 1.0f,
965             0.2f, 1.0f, 2.0f,
966
967             0.0f, 2.0f, 1.0f,
968             4.2f, 1.0f, 2.0f,
969
970             0.0f, 0.0f, 1.0f,
971             0.2f, 1.0f, 2.0f,
972         }));
973
974     auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>(
975         {
976             1.0f, 2.0f, 1.0f,
977             0.0f, 1.0f, 2.0f,
978
979             1.0f, 2.0f, -2.0f,
980             0.2f, 1.0f, 2.0f,
981
982             0.0f, 2.0f, 1.0f,
983             4.2f, 0.0f, -3.0f,
984
985             0.0f, 0.0f, 1.0f,
986             0.7f, 1.0f, 5.0f,
987         }));
988
989     LayerTestResult<float,4> ret(outputTensorInfo);
990     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>(
991         {
992             1.0f, 4.0f, 2.0f,
993             0.2f, 2.0f, 4.0f,
994
995             2.0f, 4.0f, -1.0f,
996             0.4f, 2.0f, 4.0f,
997
998             0.0f, 4.0f, 2.0f,
999             8.4f, 1.0f, -1.0f,
1000
1001             0.0f, 0.0f, 2.0f,
1002             0.9f, 2.0f, 7.0f,
1003         }));
1004
1005     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1006     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
1007     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1008
1009     armnn::AdditionQueueDescriptor data;
1010     armnn::WorkloadInfo info;
1011     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1012     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
1013     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1014
1015     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
1016
1017     inputHandle1->Allocate();
1018     inputHandle2->Allocate();
1019     outputHandle->Allocate();
1020
1021     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1022     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
1023
1024     workloadFactory.Finalize();
1025     workload->Execute();
1026
1027     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1028
1029     return ret;
1030 }
1031
1032 template <typename T>
1033 LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory,
1034     float qScale,
1035     int32_t qOffset)
1036 {
1037     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType<T>());
1038     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType<T>());
1039     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
1040
1041     if (armnn::IsQuantizedType<T>())
1042     {
1043         inputTensorInfo1.SetQuantizationScale(qScale);
1044         inputTensorInfo1.SetQuantizationOffset(qOffset);
1045         inputTensorInfo2.SetQuantizationScale(qScale);
1046         inputTensorInfo2.SetQuantizationOffset(qOffset);
1047         outputTensorInfo.SetQuantizationScale(qScale);
1048         outputTensorInfo.SetQuantizationOffset(qOffset);
1049     }
1050
1051     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
1052         {
1053             0.0f,
1054             1.0f,
1055
1056             2.0f,
1057             3.0f,
1058
1059             4.0f,
1060             5.0f,
1061         }));
1062
1063     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
1064         {
1065             0.5f, 1.5f, 2.5f,
1066             3.5f, 4.5f, 5.5f,
1067         }));
1068
1069     LayerTestResult<T,4> ret(outputTensorInfo);
1070     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
1071         {
1072             0.5f, 1.5f, 2.5f,
1073             4.5f, 5.5f, 6.5f,
1074
1075             2.5f, 3.5f, 4.5f,
1076             6.5f, 7.5f, 8.5f,
1077
1078             4.5f, 5.5f, 6.5f,
1079             8.5f, 9.5f, 10.5f,
1080         }));
1081
1082     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1083     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
1084     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1085
1086     armnn::AdditionQueueDescriptor data;
1087     armnn::WorkloadInfo info;
1088     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1089     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
1090     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1091
1092     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
1093
1094     inputHandle1->Allocate();
1095     inputHandle2->Allocate();
1096     outputHandle->Allocate();
1097
1098     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1099     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
1100
1101     workloadFactory.Finalize();
1102     workload->Execute();
1103
1104     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1105
1106     return ret;
1107 }
1108
1109 template <typename T>
1110 LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory,
1111     float qScale,
1112     int32_t qOffset)
1113 {
1114     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
1115     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType<T>());
1116     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
1117
1118     if (armnn::IsQuantizedType<T>())
1119     {
1120         inputTensorInfo1.SetQuantizationScale(qScale);
1121         inputTensorInfo1.SetQuantizationOffset(qOffset);
1122         inputTensorInfo2.SetQuantizationScale(qScale);
1123         inputTensorInfo2.SetQuantizationOffset(qOffset);
1124         outputTensorInfo.SetQuantizationScale(qScale);
1125         outputTensorInfo.SetQuantizationOffset(qOffset);
1126     }
1127
1128     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
1129         {
1130              0.0f,  1.0f,  2.0f,
1131              3.0f,  4.0f,  5.0f,
1132              6.0f,  7.0f,  8.0f,
1133              9.0f, 10.0f, 11.0f,
1134             12.0f, 13.0f, 14.0f,
1135             15.0f, 16.0f, 17.0f,
1136         }));
1137
1138     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
1139         {
1140             0.5f,
1141         }));
1142
1143     LayerTestResult<T,4> ret(outputTensorInfo);
1144     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
1145         {
1146              0.5f,  1.5f,  2.5f,
1147              3.5f,  4.5f,  5.5f,
1148              6.5f,  7.5f,  8.5f,
1149              9.5f, 10.5f, 11.5f,
1150             12.5f, 13.5f, 14.5f,
1151             15.5f, 16.5f, 17.5f,
1152         }));
1153
1154     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1155     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
1156     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1157
1158     armnn::AdditionQueueDescriptor data;
1159     armnn::WorkloadInfo info;
1160     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1161     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
1162     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1163
1164     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
1165
1166     inputHandle1->Allocate();
1167     inputHandle2->Allocate();
1168     outputHandle->Allocate();
1169
1170     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1171     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
1172
1173     workloadFactory.Finalize();
1174     workload->Execute();
1175
1176     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1177
1178     return ret;
1179 }
1180
1181 LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory)
1182 {
1183     return AdditionBroadcastTestImpl<float>(workloadFactory, 0.0f, 0);
1184 }
1185
1186 LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory)
1187 {
1188     return AdditionBroadcastTestImpl<uint8_t>(workloadFactory, 2.f, 0);
1189 }
1190
1191 LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
1192 {
1193     return AdditionBroadcast1ElementTestImpl<float>(workloadFactory, 0.0f, 0);
1194 }
1195
1196 LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
1197 {
1198     return AdditionBroadcast1ElementTestImpl<uint8_t>(workloadFactory, 0.1333333f, 128);
1199 }
1200
1201 LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory,
1202                                              armnn::IWorkloadFactory& refWorkloadFactory)
1203 {
1204     unsigned int batchSize = 4;
1205     unsigned int channels  = 1;
1206     unsigned int height    = 2;
1207     unsigned int width     = 3;
1208
1209     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
1210     armnn::TensorInfo outputTensorInfo;
1211
1212     unsigned int shape[] = {batchSize, channels, height, width};
1213
1214     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1215     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1216     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1217
1218     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232);
1219     auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456);
1220
1221     LayerTestResult<float,4> ret(outputTensorInfo);
1222
1223     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1224     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
1225     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1226
1227     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
1228     std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2);
1229     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1230
1231     armnn::AdditionQueueDescriptor data;
1232     armnn::WorkloadInfo info;
1233     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1234     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
1235     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1236
1237     armnn::AdditionQueueDescriptor refData = data;
1238     armnn::WorkloadInfo refInfo = info;
1239     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get());
1240     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get());
1241     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1242
1243     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
1244     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo);
1245
1246     inputHandle1->Allocate();
1247     inputHandle2->Allocate();
1248     outputHandle->Allocate();
1249     inputHandle1Ref->Allocate();
1250     inputHandle2Ref->Allocate();
1251     outputHandleRef->Allocate();
1252
1253     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1254     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
1255     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
1256     CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]);
1257
1258     workloadFactory.Finalize();
1259     workload->Execute();
1260     refWorkloadFactory.Finalize();
1261     workloadRef->Execute();
1262
1263     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1264     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1265
1266     return ret;
1267 }
1268
1269 namespace {
1270 template <typename T>
1271 LayerTestResult<T, 4> DivisionTestHelper(armnn::IWorkloadFactory& workloadFactory,
1272                                          const unsigned int shape0[4],
1273                                          const std::vector<T>& values0,
1274                                          float scale0,
1275                                          int32_t offset0,
1276                                          const unsigned int shape1[4],
1277                                          const std::vector<T> & values1,
1278                                          float scale1,
1279                                          int32_t offset1,
1280                                          const unsigned int outShape[4],
1281                                          const std::vector<T> & outValues,
1282                                          float outScale,
1283                                          int32_t outOffset)
1284 {
1285     auto dataType = (std::is_same<T, uint8_t>::value ?
1286                      armnn::DataType::QuantisedAsymm8 :
1287                      armnn::DataType::Float32);
1288
1289     armnn::TensorInfo inputTensorInfo0(4, shape0, dataType);
1290     armnn::TensorInfo inputTensorInfo1(4, shape1, dataType);
1291     armnn::TensorInfo outputTensorInfo(4, outShape, dataType);
1292
1293     inputTensorInfo0.SetQuantizationScale(scale0);
1294     inputTensorInfo0.SetQuantizationOffset(offset0);
1295
1296     inputTensorInfo1.SetQuantizationScale(scale1);
1297     inputTensorInfo1.SetQuantizationOffset(offset1);
1298
1299     outputTensorInfo.SetQuantizationScale(outScale);
1300     outputTensorInfo.SetQuantizationOffset(outOffset);
1301
1302     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0);
1303     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1);
1304
1305     LayerTestResult<T, 4> result(outputTensorInfo);
1306     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues);
1307
1308     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1309     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1310     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1311
1312     armnn::DivisionQueueDescriptor data;
1313     armnn::WorkloadInfo info;
1314     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
1315     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
1316     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1317
1318     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDivision(data, info);
1319
1320     inputHandle0->Allocate();
1321     inputHandle1->Allocate();
1322     outputHandle->Allocate();
1323
1324     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1325     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1326
1327     workloadFactory.Finalize();
1328     workload->Execute();
1329
1330     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
1331
1332     return result;
1333 }
1334 } // anonymous namespace
1335
1336 LayerTestResult<float,4> DivisionByZeroTest(armnn::IWorkloadFactory& workloadFactory)
1337 {
1338     const unsigned int width = 2;
1339     const unsigned int height = 2;
1340     const unsigned int channelCount = 2;
1341     const unsigned int batchSize = 2;
1342
1343     unsigned int shape[] = { batchSize, channelCount, height, width };
1344
1345     std::vector<float> input0({
1346                                 1.f,  1.f,  1.f,  1.f,  0.f, 0.f, 0.f, 0.f,
1347                                -1.f, -1.f, -1.f, -1.f,  5.f, 5.f, 5.f, 5.f });
1348
1349     std::vector<float> input1({
1350                                0.f, 0.f, -0.f, -0.f,  0.f, 0.f, -0.f, -0.f,
1351                                0.f, 0.f, -0.f, -0.f,  5.f, 5.f,  5.f,  5.f });
1352
1353     std::vector<float> output({
1354                                INFINITY, INFINITY, -INFINITY, -INFINITY,  NAN, NAN, -NAN, -NAN,
1355                                -INFINITY, -INFINITY, INFINITY, INFINITY,  1, 1, 1, 1 });
1356
1357     return DivisionTestHelper<float>(workloadFactory,
1358                                      shape, input0, 1.0f, 0,
1359                                      shape, input1, 1.0f, 0,
1360                                      shape, output, 1.0f, 0);
1361 }
1362
1363 LayerTestResult<float,4> DivisionTest(armnn::IWorkloadFactory& workloadFactory)
1364 {
1365     const unsigned int width = 2;
1366     const unsigned int height = 2;
1367     const unsigned int channelCount = 2;
1368     const unsigned int batchSize = 2;
1369
1370     unsigned int shape[] = { batchSize, channelCount, height, width };
1371
1372     std::vector<float> input0({
1373                                       2,  2,  2,  2,    3,  3,  3,  3,
1374                                       4,  4,  4,  4,    5,  5,  5,  5 });
1375
1376     std::vector<float> input1({
1377                                       1,  1,  1,  1,    2,  2,  2,  2,
1378                                       4,  4,  4,  4,    4,  4,  4,  4 });
1379
1380     std::vector<float> output({
1381                                       2,  2,  2,  2,    1.5,  1.5,  1.5,  1.5,
1382                                       1, 1, 1, 1,  1.25, 1.25, 1.25, 1.25 });
1383
1384
1385     return DivisionTestHelper<float>(workloadFactory,
1386                                      shape, input0, 1.0f, 0,
1387                                      shape, input1, 1.0f, 0,
1388                                      shape, output, 1.0f, 0);
1389 }
1390
1391 LayerTestResult<float, 4> DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
1392 {
1393     unsigned int shape0[] = { 1, 2, 2, 2 };
1394     std::vector<float> input0({ 2, 4, 6, 8, 10, 12, 14, 16});
1395
1396     unsigned int shape1[] = { 1, 1, 1, 1 };
1397     std::vector<float> input1({ 2 });
1398
1399     std::vector<float> output({ 1, 2, 3, 4, 5, 6, 7, 8});
1400
1401
1402     return DivisionTestHelper<float>(workloadFactory,
1403                                      shape0, input0, 1.0f, 0,
1404                                      shape1, input1, 1.0f, 0,
1405                                      shape0, output, 1.0f, 0);
1406 }
1407
1408 LayerTestResult<float, 4> DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory)
1409 {
1410     unsigned int shape0[] = { 1, 3, 3, 2 };
1411     std::vector<float> input0({
1412                                       1,   4,       3,  8,      5, 12,
1413                                       7,   16,      9, 20,     11, 24,
1414                                       13,  28,     15, 32,     17, 36});
1415
1416     unsigned int shape1[] = { 1, 1, 1, 2 };
1417     std::vector<float> input1({ 1, 2 });
1418
1419     std::vector<float> output({
1420                                       1,   2,      3,  4,      5,  6,
1421                                       7,   8,      9, 10,     11, 12,
1422                                       13, 14,     15, 16,     17, 18});
1423
1424     return DivisionTestHelper<float>(workloadFactory,
1425                                      shape0, input0, 1.0f, 0,
1426                                      shape1, input1, 1.0f, 0,
1427                                      shape0, output, 1.0f, 0);
1428 }
1429
1430
1431 LayerTestResult<uint8_t,4> DivisionUint8Test(armnn::IWorkloadFactory& workloadFactory)
1432 {
1433     const unsigned int width = 2;
1434     const unsigned int height = 2;
1435     const unsigned int channelCount = 2;
1436     const unsigned int batchSize = 2;
1437
1438     unsigned int shape[] = { batchSize, channelCount, height, width };
1439
1440     std::vector<uint8_t> input0({2,  2,  2,  2,    3,  3,  3,  3,
1441                                  4,  4,  4,  4,    5,  5,  5,  5 });
1442
1443     std::vector<uint8_t> input1({1,  1,  1,  1,    2,  2,  2,  2,
1444                                  4,  4,  4,  4,    4,  4,  4,  4 });
1445
1446     std::vector<uint8_t> output({8,  8,  8,  8,    6,  6,  6,  6,
1447                                  4,  4,  4,  4,    5,  5,  5,  5});
1448
1449
1450     return DivisionTestHelper<uint8_t>(workloadFactory,
1451                                      shape, input0, 1.0f,  0,
1452                                      shape, input1, 1.0f,  0,
1453                                      shape, output, 0.25f, 0);
1454 }
1455
1456 LayerTestResult<uint8_t, 4> DivisionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
1457 {
1458     unsigned int shape0[] = { 1, 2, 2, 2 };
1459     std::vector<uint8_t> input0({ 2, 4, 6, 8, 10, 12, 14, 16});
1460
1461     unsigned int shape1[] = { 1, 1, 1, 1 };
1462     std::vector<uint8_t> input1({ 2 });
1463
1464     std::vector<uint8_t> output({ 1, 2, 3, 4, 5, 6, 7, 8});
1465
1466     return DivisionTestHelper<uint8_t>(workloadFactory,
1467                                      shape0, input0, 1.0f, 0,
1468                                      shape1, input1, 1.0f, 0,
1469                                      shape0, output, 1.0f, 0);
1470 }
1471
1472 LayerTestResult<uint8_t, 4> DivisionBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory)
1473 {
1474     unsigned int shape0[] = { 1, 3, 3, 2 };
1475     std::vector<uint8_t> input0({1,   4,     3,  8,      5,  12,
1476                                  7,   16,    9,  20,     11, 24,
1477                                  13,  28,    15, 32,     17, 36});
1478
1479     unsigned int shape1[] = { 1, 1, 1, 2 };
1480     std::vector<uint8_t> input1({ 1, 2 });
1481
1482     std::vector<uint8_t> output({1,   2,      3,  4,      5,  6,
1483                                  7,   8,      9, 10,     11, 12,
1484                                  13, 14,     15, 16,     17, 18});
1485
1486     return DivisionTestHelper<uint8_t>(workloadFactory,
1487                                      shape0, input0, 1.0f, 0,
1488                                      shape1, input1, 1.0f, 0,
1489                                      shape0, output, 1.0f, 0);
1490 }
1491
1492 namespace {
1493 LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory,
1494                                                   const unsigned int shape0[4],
1495                                                   const std::vector<float> & values0,
1496                                                   const unsigned int shape1[4],
1497                                                   const std::vector<float> & values1,
1498                                                   const unsigned int outShape[4],
1499                                                   const std::vector<float> & outValues)
1500 {
1501     const size_t dimensionCount = 4;
1502     armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32};
1503     armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32};
1504     armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32};
1505
1506     auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0);
1507     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1);
1508
1509     LayerTestResult<float,4> ret(outputTensorInfo);
1510
1511     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1512     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1513     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1514
1515     armnn::MultiplicationQueueDescriptor data;
1516     armnn::WorkloadInfo info;
1517     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
1518     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1519     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1520
1521     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
1522
1523     inputHandle0->Allocate();
1524     inputHandle1->Allocate();
1525     outputHandle->Allocate();
1526
1527     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1528     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1529
1530     workloadFactory.Finalize();
1531     workload->Execute();
1532
1533     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1534
1535     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues);
1536     return ret;
1537 }
1538 } // anonymous namespace
1539
1540
1541 LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
1542 {
1543     const unsigned int width = 2;
1544     const unsigned int height = 2;
1545     const unsigned int channelCount = 2;
1546     const unsigned int batchSize = 2;
1547
1548     unsigned int shape[] = { batchSize, channelCount, height, width };
1549
1550     std::vector<float> input0({
1551         1,  1,  1,  1,    2,  2,  2,  2,
1552         3,  3,  3,  3,    4,  4,  4,  4 });
1553
1554     std::vector<float> input1({
1555         2,  2,  2,  2,    3,  3,  3,  3,
1556         4,  4,  4,  4,    5,  5,  5,  5 });
1557
1558     std::vector<float> output({
1559         2,  2,  2,  2,    6,  6,  6,  6,
1560         12, 12, 12, 12,  20, 20, 20, 20 });
1561
1562     return MultiplicationTestHelper(workloadFactory,
1563                                     shape,
1564                                     input0,
1565                                     shape,
1566                                     input1,
1567                                     shape,
1568                                     output);
1569 }
1570
1571 LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
1572 {
1573     unsigned int shape0[] = { 1, 2, 2, 2 };
1574     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
1575
1576     unsigned int shape1[] = { 1, 1, 1, 1 };
1577     std::vector<float> input1({ 2 });
1578
1579     std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16});
1580
1581     return MultiplicationTestHelper(workloadFactory,
1582                                     shape0,
1583                                     input0,
1584                                     shape1,
1585                                     input1,
1586                                     shape0,
1587                                     output);
1588 }
1589
1590 LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory)
1591 {
1592     unsigned int shape0[] = { 1, 3, 3, 2 };
1593     std::vector<float> input0({
1594         1,   2,      3,  4,      5,  6,
1595         7,   8,      9, 10,     11, 12,
1596         13, 14,     15, 16,     17, 18});
1597
1598     unsigned int shape1[] = { 1, 1, 1, 2 };
1599     std::vector<float> input1({ 1, 2 });
1600
1601     std::vector<float> output({
1602         1,   4,       3,  8,      5, 12,
1603         7,   16,      9, 20,     11, 24,
1604         13,  28,     15, 32,     17, 36});
1605
1606     return MultiplicationTestHelper(workloadFactory,
1607                                     shape0,
1608                                     input0,
1609                                     shape1,
1610                                     input1,
1611                                     shape0,
1612                                     output);
1613 }
1614
1615 LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
1616                                           armnn::IWorkloadFactory& refWorkloadFactory)
1617 {
1618     const unsigned int width = 16;
1619     const unsigned int height = 32;
1620     const unsigned int channelCount = 2;
1621     const unsigned int batchSize = 5;
1622
1623     armnn::TensorInfo inputTensorInfo0;
1624     armnn::TensorInfo inputTensorInfo1;
1625     armnn::TensorInfo outputTensorInfo;
1626
1627     constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
1628
1629     inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1630     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1631     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1632
1633     LayerTestResult<float,4> comparisonResult(outputTensorInfo);
1634
1635     auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992);
1636     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257);
1637
1638     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1639     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1640     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1641
1642     std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0);
1643     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
1644     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1645
1646     armnn::MultiplicationQueueDescriptor data;
1647     armnn::WorkloadInfo info;
1648     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
1649     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1650     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1651
1652     armnn::MultiplicationQueueDescriptor refData = data;
1653     armnn::WorkloadInfo refInfo = info;
1654     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get());
1655     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get());
1656     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1657
1658     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
1659     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo);
1660
1661     inputHandle0->Allocate();
1662     inputHandle1->Allocate();
1663     outputHandle->Allocate();
1664     inputHandle0Ref->Allocate();
1665     inputHandle1Ref->Allocate();
1666     outputHandleRef->Allocate();
1667
1668     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1669     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1670     CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]);
1671     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
1672
1673     workloadFactory.Finalize();
1674     workload->Execute();
1675     refWorkloadFactory.Finalize();
1676     workloadRef->Execute();
1677
1678     CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get());
1679     CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get());
1680
1681     return comparisonResult;
1682 }
1683
1684 LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory,
1685                                      armnn::IWorkloadFactory& refWorkloadFactory)
1686 {
1687     const unsigned int width     = 2;
1688     const unsigned int height    = 3;
1689     const unsigned int channels  = 5;
1690     const unsigned int batchSize = 3;
1691
1692     armnn::TensorInfo inputTensorInfo;
1693     armnn::TensorInfo outputTensorInfo;
1694     armnn::TensorInfo tensorInfo;
1695
1696     constexpr unsigned int shape[]       = {batchSize, channels, height, width};
1697     constexpr unsigned int tensorShape[] = {channels};
1698
1699     inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1700     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1701     tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32);
1702
1703     auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312);
1704
1705     auto mean     = MakeRandomTensor<float, 1>(tensorInfo, 123);
1706     auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f);
1707     auto beta     = MakeRandomTensor<float, 1>(tensorInfo, 123);
1708     auto gamma    = MakeRandomTensor<float, 1>(tensorInfo, 345);
1709
1710     LayerTestResult<float,4> ret(outputTensorInfo);
1711
1712     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
1713     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1714
1715     std::unique_ptr<armnn::ITensorHandle> inputHandleRef  = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1716     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1717
1718     armnn::BatchNormalizationQueueDescriptor data;
1719     armnn::WorkloadInfo info;
1720     armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
1721     armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
1722     armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
1723     armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
1724
1725     AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
1726     AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
1727     AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
1728     AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
1729
1730     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1731     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1732     data.m_Mean             = &meanTensor;
1733     data.m_Variance         = &varianceTensor;
1734     data.m_Beta             = &betaTensor;
1735     data.m_Gamma            = &gammaTensor;
1736     data.m_Parameters.m_Eps = 0.01f;
1737
1738     armnn::BatchNormalizationQueueDescriptor refData = data;
1739     armnn::WorkloadInfo refInfo = info;
1740     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1741     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1742
1743     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
1744     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo);
1745
1746     inputHandle->Allocate();
1747     outputHandle->Allocate();
1748     inputHandleRef->Allocate();
1749     outputHandleRef->Allocate();
1750
1751     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1752     CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1753
1754     workloadFactory.Finalize();
1755     workload->Execute();
1756     refWorkloadFactory.Finalize();
1757     workloadRef->Execute();
1758
1759     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1760     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1761
1762     return ret;
1763 }
1764
1765 template<typename T>
1766 void PermuteTensorData(
1767         armnn::IWorkloadFactory& workloadFactory,
1768         const armnn::PermutationVector& mappings,
1769         armnn::TensorInfo & inputTensorInfo,
1770         const T * inputData,
1771         std::vector<T>& outputData)
1772 {
1773     BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
1774     if (inputData == nullptr)
1775     {
1776         // Nullptr is an error in the test. By returning without doing the concatenation
1777         // I expect the caller to fail the test. It still makes sense to report this as
1778         // an assert for Debug builds.
1779         return;
1780     }
1781
1782     armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
1783
1784     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1785     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1786
1787     armnn::PermuteQueueDescriptor queueDescriptor;
1788     queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings};
1789     armnn::WorkloadInfo workloadInfo;
1790     AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
1791     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
1792
1793     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
1794
1795     inputHandle->Allocate();
1796     outputHandle->Allocate();
1797
1798     CopyDataToITensorHandle(inputHandle.get(), inputData);
1799
1800     workload->Execute();
1801
1802     outputData.resize(outputTensorInfo.GetNumElements());
1803     CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
1804     inputTensorInfo = outputTensorInfo;
1805 }
1806
1807 armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation(
1808         const std::vector<armnn::TensorInfo> & inputTensorInfos,
1809         unsigned int concatDim)
1810 {
1811     std::vector<armnn::TensorShape> shapes;
1812     shapes.reserve(inputTensorInfos.size());
1813     for (const armnn::TensorInfo& it: inputTensorInfos)
1814     {
1815         shapes.push_back(it.GetShape());
1816     }
1817
1818     return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(),
1819                                                          shapes.end(),
1820                                                          concatDim);
1821 }
1822
1823 //
1824 // Concatenation is only supported for N and C dimensions for NCHW. In case of
1825 // <4 dimensions we need to make sure that the concat dimensions are at least
1826 // the 3rd slowest iterating one.
1827 //
1828
1829 bool NeedPermuteForConcat(
1830         const std::vector<armnn::TensorInfo> & inputTensorInfos,
1831         unsigned int concatDim)
1832 {
1833     // See note above. Additionally we expect the input shapes to have the
1834     // same number of dimensions.
1835     unsigned int nDimensions = 0;
1836
1837     // Determine the number of dimensions as well as sanity check them
1838     // agains test implementation issues.
1839     for (auto && tensorInfo : inputTensorInfos)
1840     {
1841         if (!nDimensions)
1842         {
1843             nDimensions = tensorInfo.GetShape().GetNumDimensions();
1844         }
1845         else
1846         {
1847             BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
1848                 "Input shapes must have the same number of dimensions");
1849         }
1850     }
1851
1852     return (nDimensions-concatDim) < 3;
1853 }
1854
1855 armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape)
1856 {
1857     unsigned int numDims = inputShape.GetNumDimensions();
1858     if (numDims >= 3)
1859     {
1860         // Nothing to do if the inputShape has at least 3 dimensions.
1861         return inputShape;
1862     }
1863
1864     std::vector<unsigned int> newDims(size_t(3), 1u);
1865     unsigned int expandedBy = 3 - numDims;
1866     for (unsigned int i=0; i<numDims; ++i)
1867     {
1868         newDims[expandedBy+i] = inputShape[i];
1869     }
1870     return armnn::TensorShape(3u, &newDims[0]);
1871 }
1872
1873 void Generate3dPermuteVectorForConcat(
1874         unsigned int numDimensions,
1875         unsigned int & concatDim,
1876         std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations)
1877 {
1878     BOOST_ASSERT_MSG(numDimensions <= 3,
1879        "Only dimensions 1,2 and 3 are supported by this helper");
1880
1881     unsigned int expandedBy = 3 - numDimensions;
1882     unsigned int expandedConcatAxis = concatDim + expandedBy;
1883
1884     if (expandedConcatAxis == 2)
1885     {
1886         concatDim = 0;
1887         armnn::PermutationVector forwardPermutation({1, 2, 0});
1888         armnn::PermutationVector reversePermutation({2, 0, 1});
1889         permutations = std::make_pair(forwardPermutation, reversePermutation);
1890     }
1891     else if (expandedConcatAxis == 1)
1892     {
1893         concatDim = 0;
1894         armnn::PermutationVector forwardPermutation({2, 0, 1});
1895         armnn::PermutationVector reversePermutation({1, 2, 0});
1896         permutations = std::make_pair(forwardPermutation, reversePermutation);
1897     }
1898     else
1899     {
1900         BOOST_ASSERT(expandedConcatAxis == 0);
1901         concatDim = 0;
1902     }
1903 }
1904
1905 //
1906 // Permute the input tensors so we can do a supported concatenation.
1907 // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
1908 // at the front. Finally this function tells what the output shape
1909 // of the permuted concatenated tensor is going to be.
1910 //
1911 template <typename T>
1912 void PermuteInputsForConcat(
1913         armnn::IWorkloadFactory& workloadFactory,
1914         std::vector<armnn::TensorInfo> & inputTensorInfos,
1915         std::vector<T *> & inputData,
1916         std::vector<std::vector<T>> & inputDataStorage,
1917         armnn::PermutationVector & permuteVector,
1918         unsigned int & concatDim,
1919         armnn::TensorInfo & outputTensorInfo)
1920 {
1921     BOOST_ASSERT_MSG(inputTensorInfos.size() > 1,
1922         "Expecting more than one tensor to be concatenated here");
1923
1924     unsigned int numDims = 0;
1925     unsigned int nthInput = 0;
1926     const armnn::PermutationVector identity({0, 1, 2});
1927
1928     std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations =
1929         std::make_pair(identity, identity);
1930
1931     inputDataStorage.resize(inputData.size());
1932
1933     for (auto && tensorInfo : inputTensorInfos)
1934     {
1935         if (numDims == 0)
1936         {
1937             numDims = tensorInfo.GetShape().GetNumDimensions();
1938             Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
1939             // Store the reverese permutation.
1940             permuteVector = permutations.second;
1941             BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity),
1942                 "Test logic error, we don't need permutation, so we shouldn't arrive here");
1943         }
1944         else
1945         {
1946             BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
1947                 "All inputs must have the same number of dimensions");
1948         }
1949
1950         armnn::TensorInfo newTensorInfo = tensorInfo;
1951         newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
1952
1953         PermuteTensorData<T>(workloadFactory,
1954                              permutations.first,
1955                              newTensorInfo,
1956                              inputData[nthInput],
1957                              inputDataStorage[nthInput]);
1958
1959         inputData[nthInput] = inputDataStorage[nthInput].data();
1960         inputTensorInfos[nthInput] = newTensorInfo;
1961
1962         ++nthInput;
1963     }
1964
1965     outputTensorInfo.SetShape(
1966         armnnUtils::Permuted(
1967             ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
1968             permutations.first));
1969 }
1970
1971
1972 //
1973 // This is the pair of PermuteInputsForConcat(...) which permutes back
1974 // the output of the concatenation so we can check it against an expected
1975 // output.
1976 //
1977 template <typename T>
1978 void PermuteOutputForConcat(
1979         armnn::IWorkloadFactory& workloadFactory,
1980         const armnn::TensorInfo & tensorInfo,
1981         const armnn::PermutationVector & permuteVector,
1982         std::unique_ptr<armnn::ITensorHandle> && inputDataHandle,
1983         T * data)
1984 {
1985     BOOST_ASSERT_MSG(data != nullptr, "data must not be null");
1986     if (data == nullptr)
1987     {
1988         // Nullptr is an error in the test. By returning without doing the permutation
1989         // I expect the caller to fail the test. It still makes sense to report this as
1990         // an assert for Debug builds.
1991         return;
1992     }
1993
1994     armnn::TensorInfo resultTensorInfo = tensorInfo;
1995     std::vector<T> inputData(tensorInfo.GetNumElements());
1996     std::vector<T> outputData;
1997
1998     CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
1999
2000     PermuteTensorData<T>(workloadFactory,
2001                          permuteVector,
2002                          resultTensorInfo,
2003                          &inputData[0],
2004                          outputData);
2005
2006     ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
2007 }
2008
2009 template <typename T>
2010 void Concatenate(armnn::IWorkloadFactory& workloadFactory,
2011                  std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig,
2012                  std::initializer_list<T *> inputsOrig,
2013                  const armnn::TensorInfo& outputTensorInfoOrig,
2014                  T * output,
2015                  unsigned int concatDim)
2016 {
2017     BOOST_ASSERT_MSG(output != nullptr, "output must not be null");
2018     if (output == nullptr)
2019     {
2020         // Nullptr is an error in the test. By returning without doing the permutation
2021         // I expect the caller to fail the test. It still makes sense to report this as
2022         // an assert for Debug builds.
2023         return;
2024     }
2025
2026     armnn::MergerQueueDescriptor queueDescriptor;
2027
2028     // Saves a copy of the parameters which we might need to change.
2029     std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
2030     std::vector<T *> inputs            = inputsOrig;
2031     armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig;
2032
2033     armnn::PermutationVector permuteVector{0, 1, 2};
2034
2035     // Holds and automatically releases memory for the reshaped input data.
2036     std::vector<std::vector<T>> tmpInputDataStorage;
2037
2038     const size_t inputCount = inputTensorInfos.size();
2039
2040     bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
2041
2042     if (needPermuteForConcat)
2043     {
2044         //
2045         // We need to permute the inputs, because concatenation along
2046         // the requested axis is not supported.
2047         //
2048         PermuteInputsForConcat<T>(workloadFactory,
2049                                   inputTensorInfos,
2050                                   inputs,
2051                                   tmpInputDataStorage,
2052                                   permuteVector,
2053                                   concatDim,
2054                                   outputTensorInfo);
2055     }
2056
2057     armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim);
2058
2059     queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
2060     for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
2061     {
2062         queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
2063             viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
2064     }
2065
2066     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2067
2068     std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
2069     inputHandles.reserve(inputCount);
2070
2071     const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2072     for (unsigned int i = 0; i < inputCount; ++i)
2073     {
2074         const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i];
2075
2076         std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ?
2077             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(),
2078                 queueDescriptor.m_ViewOrigins[i].m_Origin.data())
2079             : workloadFactory.CreateTensorHandle(inputTensorInfo);
2080
2081         inputHandles.emplace_back(std::move(inputHandle));
2082     }
2083
2084     armnn::WorkloadInfo workloadInfo;
2085
2086     for (unsigned int i = 0; i < inputCount; ++i)
2087     {
2088         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
2089     }
2090
2091     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
2092
2093     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo);
2094
2095     for (auto& inputHandle : inputHandles)
2096     {
2097         inputHandle->Allocate();
2098     }
2099
2100     outputHandle->Allocate();
2101
2102     unsigned int nextInputId = 0;
2103     for (auto& inputHandle : inputHandles)
2104     {
2105         CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
2106         ++nextInputId;
2107     }
2108
2109     workloadFactory.Finalize();
2110     workload->Execute();
2111
2112     if (needPermuteForConcat)
2113     {
2114         PermuteOutputForConcat<T>(workloadFactory,
2115                                   outputTensorInfo,
2116                                   permuteVector,
2117                                   std::move(outputHandle),
2118                                   output);
2119     }
2120     else
2121     {
2122         CopyDataFromITensorHandle(output, outputHandle.get());
2123     }
2124 }
2125
2126 template <typename T>
2127 LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset)
2128 {
2129     armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType<T>());
2130
2131     auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f }));
2132     auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f }));
2133     auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f }));
2134
2135     armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType<T>());
2136
2137     LayerTestResult<T, 1> result(outputTensorInfo);
2138
2139     std::vector<T> output;
2140     output.resize(outputTensorInfo.GetNumElements());
2141     Concatenate<T>(workloadFactory,
2142         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
2143         { input0.data(), input1.data(), input2.data() },
2144         outputTensorInfo,
2145         output.data(),
2146         0);
2147
2148     result.output = MakeTensor<T, 1>(outputTensorInfo, output);
2149     result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2150         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
2151     }));
2152
2153     return result;
2154 }
2155
2156 LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory)
2157 {
2158     return Concatenation1dTestImpl<float>(workloadFactory, 0.0f, 0);
2159 }
2160
2161 template <typename T>
2162 LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
2163     const armnn::TensorInfo& outputTensorInfo,
2164     unsigned int dimension,
2165     const float qScale,
2166     const int32_t qOffset)
2167 {
2168     armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType<T>());
2169
2170     auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2171         // Batch 0
2172         1.0f, 2.0f, 3.0f,
2173
2174         // Batch 1
2175         10.0f, 11.0f, 12.0f,
2176     }));
2177
2178     auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2179         // Batch 0
2180         4.0f, 5.0f, 6.0f,
2181
2182         // Batch 1
2183         13.0f, 14.0f, 15.0f,
2184     }));
2185
2186     auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2187         // Batch 0
2188         7.0f, 8.0f, 9.0f,
2189
2190         // Batch 1
2191         16.0f, 17.0f, 18.0f,
2192     }));
2193
2194     LayerTestResult<T, 2> result(outputTensorInfo);
2195
2196     std::vector<T> output;
2197     output.resize(outputTensorInfo.GetNumElements());
2198     Concatenate<T>(workloadFactory,
2199         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
2200         { input0.data(), input1.data(), input2.data() },
2201         outputTensorInfo,
2202         output.data(),
2203         dimension);
2204
2205     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
2206     return result;
2207 }
2208
2209 template <typename T>
2210 LayerTestResult<T, 2> Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory,
2211     float qScale, int32_t qOffset)
2212 {
2213     armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>());
2214
2215     LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 0, qScale, qOffset);
2216     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2217         // Batch 0
2218         1.0f, 2.0f, 3.0f,
2219
2220         // Batch 1
2221         10.0f, 11.0f, 12.0f,
2222
2223         // Batch 2
2224         4.0f, 5.0f, 6.0f,
2225
2226         // Batch 3
2227         13.0f, 14.0f, 15.0f,
2228
2229         // Batch 4
2230         7.0f, 8.0f, 9.0f,
2231
2232         // Batch 5
2233         16.0f, 17.0f, 18.0f,
2234     }));
2235
2236     return result;
2237 }
2238
2239 LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory)
2240 {
2241     return Concatenation2dDim0TestImpl<float>(workloadFactory, 0.0f, 0);
2242 }
2243
2244 template <typename T>
2245 LayerTestResult<T, 2> Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory,
2246     float qScale, int32_t qOffset)
2247 {
2248     armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>());
2249
2250     LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset);
2251     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2252         // Batch 0
2253         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
2254
2255         // Batch 1
2256         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
2257     }));
2258
2259     return result;
2260 }
2261
2262 LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory)
2263 {
2264     return Concatenation2dDim1TestImpl<float>(workloadFactory, 0.0f, 0);
2265 }
2266
2267 template <typename T>
2268 LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2269     int32_t qOffset)
2270 {
2271     armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>());
2272     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2273         // Batch 0
2274         1.0f, 2.0f, 3.0f,
2275
2276         // Batch 1
2277         10.0f, 11.0f, 12.0f,
2278     }));
2279
2280     armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType<T>());
2281     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2282         // Batch 0
2283         4.0f, 5.0f, 6.0f,
2284
2285         // Batch 1
2286         13.0f, 14.0f, 15.0f,
2287
2288         // Batch 0
2289         7.0f, 8.0f, 9.0f,
2290     }));
2291
2292     armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType<T>());
2293     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2294         // Batch 1
2295         16.0f, 17.0f, 18.0f,
2296     }));
2297
2298     armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>());
2299     LayerTestResult<T, 2> result(outputTensorInfo);
2300
2301     std::vector<T> output;
2302     output.resize(outputTensorInfo.GetNumElements());
2303     Concatenate<T>(workloadFactory,
2304         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2305         { input0.data(), input1.data(), input2.data() },
2306         outputTensorInfo,
2307         output.data(),
2308         0);
2309
2310     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
2311     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2312         // Batch 0
2313         1.0f, 2.0f, 3.0f,
2314
2315         // Batch 1
2316         10.0f, 11.0f, 12.0f,
2317
2318         // Batch 2
2319         4.0f, 5.0f, 6.0f,
2320
2321         // Batch 3
2322         13.0f, 14.0f, 15.0f,
2323
2324         // Batch 4
2325         7.0f, 8.0f, 9.0f,
2326
2327         // Batch 5
2328         16.0f, 17.0f, 18.0f,
2329     }));
2330
2331     return result;
2332 }
2333
2334 LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2335 {
2336     return Concatenation2dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2337 }
2338
2339 template <typename T>
2340 LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2341     int32_t qOffset)
2342 {
2343     armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>());
2344     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2345         // Batch 0
2346         1.0f, 2.0f, 3.0f,
2347
2348         // Batch 1
2349         10.0f, 11.0f, 12.0f,
2350     }));
2351
2352     armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType<T>());
2353     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2354         // Batch 0
2355         4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
2356
2357         // Batch 1
2358         13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
2359     }));
2360
2361     armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType<T>());
2362     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2363         // Batch 0
2364         9.0f,
2365
2366         // Batch 1
2367         18.0f
2368     }));
2369
2370     armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>());
2371     LayerTestResult<T, 2> result(outputTensorInfo);
2372
2373     std::vector<T> output;
2374     output.resize(outputTensorInfo.GetNumElements());
2375     Concatenate<T>(workloadFactory,
2376         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2377         { input0.data(), input1.data(), input2.data() },
2378         outputTensorInfo,
2379         output.data(),
2380         1);
2381
2382     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
2383     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2384         // Batch 0
2385         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
2386
2387         // Batch 1
2388         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
2389     }));
2390
2391     return result;
2392 }
2393
2394 LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2395 {
2396     return Concatenation2dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2397 }
2398
2399 template <typename T>
2400 LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory,
2401     const armnn::TensorInfo& outputTensorInfo,
2402     unsigned int dimension,
2403     float qScale,
2404     int32_t qOffset)
2405 {
2406     armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2407
2408     auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2409         // Batch 0, Channel 0
2410         1.0f, 2.0f,
2411
2412         // Batch 0, Channel 1
2413         3.0f, 4.0f,
2414
2415         // Batch 0, Channel 2
2416         5.0f, 6.0f,
2417
2418         // Batch 1, Channel 0
2419         19.0f, 20.0f,
2420
2421         // Batch 1, Channel 1
2422         21.0f, 22.0f,
2423
2424         // Batch 1, Channel 2
2425         23.0f, 24.0f
2426     }));
2427
2428     auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2429         // Batch 0, Channel 0
2430         7.0f, 8.0f,
2431
2432         // Batch 0, Channel 1
2433         9.0f, 10.0f,
2434
2435         // Batch 0, Channel 2
2436         11.0f, 12.0f,
2437
2438         // Batch 1, Channel 0
2439         25.0f, 26.0f,
2440
2441         // Batch 1, Channel 1
2442         27.0f, 28.0f,
2443
2444         // Batch 1, Channel 2
2445         29.0f, 30.0f
2446     }));
2447
2448     auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2449         // Batch 0, Channel 0
2450         13.0f, 14.0f,
2451
2452         // Batch 0, Channel 1
2453         15.0f, 16.0f,
2454
2455         // Batch 0, Channel 2
2456         17.0f, 18.0f,
2457
2458         // Batch 1, Channel 0
2459         31.0f, 32.0f,
2460
2461         // Batch 1, Channel 1
2462         33.0f, 34.0f,
2463
2464         // Batch 1, Channel 2
2465         35.0f, 36.0f
2466     }));
2467
2468     LayerTestResult<T, 3> result(outputTensorInfo);
2469
2470     std::vector<T> output;
2471     output.resize(outputTensorInfo.GetNumElements());
2472     Concatenate<T>(workloadFactory,
2473         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
2474         { input0.data(), input1.data(), input2.data() },
2475         outputTensorInfo,
2476         output.data(),
2477         dimension);
2478
2479     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2480     return result;
2481 }
2482
2483 template <typename T>
2484 LayerTestResult<T, 3> Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2485     int32_t qOffset)
2486 {
2487     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>());
2488
2489     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 0,
2490         qScale, qOffset);
2491     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2492         // Batch 0, Channel 0
2493         1.0f, 2.0f,
2494
2495         // Batch 0, Channel 1
2496         3.0f, 4.0f,
2497
2498         // Batch 0, Channel 2
2499         5.0f, 6.0f,
2500
2501         // Batch 1, Channel 0
2502         19.0f, 20.0f,
2503
2504         // Batch 1, Channel 1
2505         21.0f, 22.0f,
2506
2507         // Batch 1, Channel 2
2508         23.0f, 24.0f,
2509
2510         // Batch 2, Channel 0
2511         7.0f, 8.0f,
2512
2513         // Batch 2, Channel 1
2514         9.0f, 10.0f,
2515
2516         // Batch 2, Channel 2
2517         11.0f, 12.0f,
2518
2519         // Batch 3, Channel 0
2520         25.0f, 26.0f,
2521
2522         // Batch 3, Channel 1
2523         27.0f, 28.0f,
2524
2525         // Batch 3, Channel 2
2526         29.0f, 30.0f,
2527
2528         // Batch 4, Channel 0
2529         13.0f, 14.0f,
2530
2531         // Batch 4, Channel 1
2532         15.0f, 16.0f,
2533
2534         // Batch 4, Channel 2
2535         17.0f, 18.0f,
2536
2537         // Batch 5, Channel 0
2538         31.0f, 32.0f,
2539
2540         // Batch 5, Channel 1
2541         33.0f, 34.0f,
2542
2543         // Batch 5, Channel 2
2544         35.0f, 36.0f
2545     }));
2546     return result;
2547 }
2548
2549 LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory)
2550 {
2551     return Concatenation3dDim0TestImpl<float>(workloadFactory, 0.0f, 0);
2552 }
2553
2554 template <typename T>
2555 LayerTestResult<T, 3> Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory,
2556     float qScale, int32_t qOffset)
2557 {
2558     armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType<T>());
2559
2560     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset);
2561     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2562         // Batch 0, Channel 0
2563         1.0f, 2.0f,
2564
2565         // Batch 0, Channel 1
2566         3.0f, 4.0f,
2567
2568         // Batch 0, Channel 2
2569         5.0f, 6.0f,
2570
2571         // Batch 0, Channel 3
2572         7.0f, 8.0f,
2573
2574         // Batch 0, Channel 4
2575         9.0f, 10.0f,
2576
2577         // Batch 0, Channel 5
2578         11.0f, 12.0f,
2579
2580         // Batch 0, Channel 6
2581         13.0f, 14.0f,
2582
2583         // Batch 0, Channel 7
2584         15.0f, 16.0f,
2585
2586         // Batch 0, Channel 8
2587         17.0f, 18.0f,
2588
2589         // Batch 1, Channel 0
2590         19.0f, 20.0f,
2591
2592         // Batch 1, Channel 1
2593         21.0f, 22.0f,
2594
2595         // Batch 1, Channel 2
2596         23.0f, 24.0f,
2597
2598         // Batch 1, Channel 3
2599         25.0f, 26.0f,
2600
2601         // Batch 1, Channel 4
2602         27.0f, 28.0f,
2603
2604         // Batch 1, Channel 5
2605         29.0f, 30.0f,
2606
2607         // Batch 1, Channel 6
2608         31.0f, 32.0f,
2609
2610         // Batch 1, Channel 7
2611         33.0f, 34.0f,
2612
2613         // Batch 1, Channel 8
2614         35.0f, 36.0f
2615     }));
2616
2617     return result;
2618 }
2619
2620 LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory)
2621 {
2622     return Concatenation3dDim1TestImpl<float>(workloadFactory, 0.0f, 0);
2623 }
2624
2625 template <typename T>
2626 LayerTestResult<T, 3> Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory,
2627     float qScale, int32_t qOffset)
2628 {
2629     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>());
2630
2631     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 2, qScale, qOffset);
2632     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2633         // Batch 0, Channel 0
2634         1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
2635
2636         // Batch 0, Channel 1
2637         3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
2638
2639         // Batch 0, Channel 2
2640         5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
2641
2642         // Batch 1, Channel 0
2643         19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
2644
2645         // Batch 1, Channel 1
2646         21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
2647
2648         // Batch 1, Channel 2
2649         23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
2650     }));
2651
2652     return result;
2653 }
2654
2655 LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory)
2656 {
2657     return Concatenation3dDim2TestImpl<float>(workloadFactory, 0.0f, 0);
2658 }
2659
2660 template <typename T>
2661 LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2662     int32_t qOffset)
2663 {
2664     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2665     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2666             // Batch 0, Channel 0
2667             1.0f, 2.0f,
2668
2669             // Batch 0, Channel 1
2670             3.0f, 4.0f,
2671
2672             // Batch 0, Channel 2
2673             5.0f, 6.0f,
2674
2675             // Batch 1, Channel 0
2676             19.0f, 20.0f,
2677
2678             // Batch 1, Channel 1
2679             21.0f, 22.0f,
2680
2681             // Batch 1, Channel 2
2682             23.0f, 24.0f
2683     }));
2684
2685     armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType<T>());
2686     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2687             // Batch 0, Channel 0
2688             7.0f, 8.0f,
2689
2690             // Batch 0, Channel 1
2691             9.0f, 10.0f,
2692
2693             // Batch 0, Channel 2
2694             11.0f, 12.0f,
2695     }));
2696
2697     armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType<T>());
2698     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2699             // Batch 0, Channel 0
2700             25.0f, 26.0f,
2701
2702             // Batch 0, Channel 1
2703             27.0f, 28.0f,
2704
2705             // Batch 0, Channel 2
2706             29.0f, 30.0f,
2707
2708             // Batch 1, Channel 0
2709             13.0f, 14.0f,
2710
2711             // Batch 1, Channel 1
2712             15.0f, 16.0f,
2713
2714             // Batch 1, Channel 2
2715             17.0f, 18.0f,
2716
2717             // Batch 2, Channel 0
2718             31.0f, 32.0f,
2719
2720             // Batch 2, Channel 1
2721             33.0f, 34.0f,
2722
2723             // Batch 2, Channel 2
2724             35.0f, 36.0f
2725     }));
2726
2727     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>());
2728     LayerTestResult<T, 3> result(outputTensorInfo);
2729
2730     std::vector<T> output;
2731     output.resize(outputTensorInfo.GetNumElements());
2732     Concatenate<T>(workloadFactory,
2733         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2734         { input0.data(), input1.data(), input2.data() },
2735         outputTensorInfo,
2736         output.data(),
2737         0);
2738
2739     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2740     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2741         // Batch 0, Channel 0
2742         1.0f, 2.0f,
2743
2744         // Batch 0, Channel 1
2745         3.0f, 4.0f,
2746
2747         // Batch 0, Channel 2
2748         5.0f, 6.0f,
2749
2750         // Batch 1, Channel 0
2751         19.0f, 20.0f,
2752
2753         // Batch 1, Channel 1
2754         21.0f, 22.0f,
2755
2756         // Batch 1, Channel 2
2757         23.0f, 24.0f,
2758
2759         // Batch 2, Channel 0
2760         7.0f, 8.0f,
2761
2762         // Batch 2, Channel 1
2763         9.0f, 10.0f,
2764
2765         // Batch 2, Channel 2
2766         11.0f, 12.0f,
2767
2768         // Batch 3, Channel 0
2769         25.0f, 26.0f,
2770
2771         // Batch 3, Channel 1
2772         27.0f, 28.0f,
2773
2774         // Batch 3, Channel 2
2775         29.0f, 30.0f,
2776
2777         // Batch 4, Channel 0
2778         13.0f, 14.0f,
2779
2780         // Batch 4, Channel 1
2781         15.0f, 16.0f,
2782
2783         // Batch 4, Channel 2
2784         17.0f, 18.0f,
2785
2786         // Batch 5, Channel 0
2787         31.0f, 32.0f,
2788
2789         // Batch 5, Channel 1
2790         33.0f, 34.0f,
2791
2792         // Batch 5, Channel 2
2793         35.0f, 36.0f
2794     }));
2795
2796     return result;
2797 }
2798
2799 LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2800 {
2801     return Concatenation3dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2802 }
2803
2804 template <typename T>
2805 LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2806     int32_t qOffset)
2807 {
2808     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2809     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2810         // Batch 0, Channel 0
2811         1.0f, 2.0f,
2812
2813         // Batch 0, Channel 1
2814         3.0f, 4.0f,
2815
2816         // Batch 0, Channel 2
2817         5.0f, 6.0f,
2818
2819         // Batch 1, Channel 0
2820         19.0f, 20.0f,
2821
2822         // Batch 1, Channel 1
2823         21.0f, 22.0f,
2824
2825         // Batch 1, Channel 2
2826         23.0f, 24.0f
2827     }));
2828
2829     armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType<T>());
2830     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2831         // Batch 0, Channel 0
2832         7.0f, 8.0f,
2833
2834         // Batch 0, Channel 1
2835         9.0f, 10.0f,
2836
2837         // Batch 0, Channel 2
2838         11.0f, 12.0f,
2839
2840         // Batch 0, Channel 3
2841         25.0f, 26.0f,
2842
2843         // Batch 1, Channel 0
2844         27.0f, 28.0f,
2845
2846         // Batch 1, Channel 1
2847         29.0f, 30.0f,
2848
2849         // Batch 1, Channel 2
2850         13.0f, 14.0f,
2851
2852         // Batch 1, Channel 3
2853         15.0f, 16.0f,
2854     }));
2855
2856     armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType<T>());
2857     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2858         // Batch 0, Channel 0
2859         17.0f, 18.0f,
2860
2861         // Batch 1, Channel 0
2862         31.0f, 32.0f,
2863     }));
2864
2865     armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType<T>());
2866     LayerTestResult<T, 3> result(outputTensorInfo);
2867
2868     std::vector<T> output;
2869     output.resize(outputTensorInfo.GetNumElements());
2870     Concatenate<T>(workloadFactory,
2871         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2872         { input0.data(), input1.data(), input2.data() },
2873         outputTensorInfo,
2874         output.data(),
2875         1);
2876
2877     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2878     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2879         // Batch 0, Channel 0
2880         1.0f, 2.0f,
2881
2882         // Batch 0, Channel 1
2883         3.0f, 4.0f,
2884
2885         // Batch 0, Channel 2
2886         5.0f, 6.0f,
2887
2888         // Batch 0, Channel 3
2889         7.0f, 8.0f,
2890
2891         // Batch 0, Channel 4
2892         9.0f, 10.0f,
2893
2894         // Batch 0, Channel 5
2895         11.0f, 12.0f,
2896
2897         // Batch 0, Channel 6
2898         25.0f, 26.0f,
2899
2900         // Batch 0, Channel 7
2901         17.0f, 18.0f,
2902
2903         // Batch 1, Channel 0
2904         19.0f, 20.0f,
2905
2906         // Batch 1, Channel 1
2907         21.0f, 22.0f,
2908
2909         // Batch 1, Channel 2
2910         23.0f, 24.0f,
2911
2912         // Batch 1, Channel 3
2913         27.0f, 28.0f,
2914
2915         // Batch 1, Channel 4
2916         29.0f, 30.0f,
2917
2918         // Batch 1, Channel 5
2919         13.0f, 14.0f,
2920
2921         // Batch 1, Channel 6
2922         15.0f, 16.0f,
2923
2924         // Batch 1, Channel 7
2925         31.0f, 32.0f,
2926     }));
2927
2928     return result;
2929 }
2930
2931 LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2932 {
2933     return Concatenation3dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2934 }
2935
2936 template <typename T>
2937 LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2938     int32_t qOffset)
2939 {
2940     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2941     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2942         // Batch 0, Channel 0
2943         1.0f, 2.0f,
2944
2945         // Batch 0, Channel 1
2946         3.0f, 4.0f,
2947
2948         // Batch 0, Channel 2
2949         5.0f, 6.0f,
2950
2951         // Batch 1, Channel 0
2952         19.0f, 20.0f,
2953
2954         // Batch 1, Channel 1
2955         21.0f, 22.0f,
2956
2957         // Batch 1, Channel 2
2958         23.0f, 24.0f
2959     }));
2960
2961     armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType<T>());
2962     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2963         // Batch 0, Channel 0
2964         7.0f,
2965
2966         // Batch 0, Channel 1
2967         9.0f,
2968
2969         // Batch 0, Channel 2
2970         11.0f,
2971
2972         // Batch 1, Channel 0
2973         25.0f,
2974
2975         // Batch 1, Channel 1
2976         27.0f,
2977
2978         // Batch 1, Channel 2
2979         29.0f
2980     }));
2981
2982     armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType<T>());
2983     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2984         // Batch 0, Channel 0
2985         13.0f, 14.0f, 50.0f,
2986
2987         // Batch 0, Channel 1
2988         15.0f, 16.0f, 51.0f,
2989
2990         // Batch 0, Channel 2
2991         17.0f, 18.0f, 52.0f,
2992
2993         // Batch 1, Channel 0
2994         31.0f, 32.0f, 53.0f,
2995
2996         // Batch 1, Channel 1
2997         33.0f, 34.0f, 54.0f,
2998
2999         // Batch 1, Channel 2
3000         35.0f, 36.0f, 55.0f,
3001     }));
3002
3003     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>());
3004     LayerTestResult<T, 3> result(outputTensorInfo);
3005
3006     std::vector<T> output;
3007     output.resize(outputTensorInfo.GetNumElements());
3008     Concatenate<T>(workloadFactory,
3009         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
3010         { input0.data(), input1.data(), input2.data() },
3011         outputTensorInfo,
3012         output.data(),
3013         2);
3014
3015     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
3016     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
3017         // Batch 0, Channel 0
3018         1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
3019
3020         // Batch 0, Channel 1
3021         3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
3022
3023         // Batch 0, Channel 2
3024         5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
3025
3026         // Batch 1, Channel 0
3027         19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
3028
3029         // Batch 1, Channel 1
3030         21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
3031
3032         // Batch 1, Channel 2
3033         23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
3034     }));
3035
3036     return result;
3037 }
3038
3039 LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
3040 {
3041     return Concatenation3dDim2DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
3042 }
3043
3044 LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory,
3045                                                 const armnn::DataLayoutIndexed& dataLayout)
3046 {
3047     const armnn::TensorInfo inputTensorInfo = GetTensorInfo<float>(1, 2, 4, 4, dataLayout);
3048     const armnn::TensorInfo outputTensorInfo = GetTensorInfo<float>(1, 2, 4, 4, dataLayout);
3049
3050     std::vector<float> inputData({
3051         1.0f, 2.0f, 3.0f, 4.0f,
3052         2.0f, 3.0f, 4.0f, 5.0f,
3053         3.0f, 4.0f, 5.0f, 6.0f,
3054         4.0f, 5.0f, 6.0f, 7.0f,
3055
3056         1.0f, 2.0f, 3.0f, 4.0f,
3057         2.0f, 3.0f, 4.0f, 5.0f,
3058         3.0f, 4.0f, 5.0f, 6.0f,
3059         4.0f, 5.0f, 6.0f, 7.0f
3060     });
3061
3062     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
3063     if (dataLayout.GetDataLayout() == armnn::DataLayout::NHWC)
3064     {
3065         std::vector<float> tmp(inputData.size());
3066         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
3067         inputData = tmp;
3068     }
3069
3070     auto input = MakeTensor<float, 4>(inputTensorInfo, inputData);
3071
3072     LayerTestResult<float, 4> result(outputTensorInfo);
3073     result.outputExpected = input;
3074
3075     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3076     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3077
3078     armnn::ResizeBilinearQueueDescriptor descriptor;
3079     descriptor.m_Parameters.m_DataLayout = dataLayout;
3080     armnn::WorkloadInfo info;
3081     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3082     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3083
3084     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3085
3086     inputHandle->Allocate();
3087     outputHandle->Allocate();
3088     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3089
3090     workloadFactory.Finalize();
3091     workload->Execute();
3092
3093     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3094     return result;
3095 }
3096
3097 LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory,
3098                                                    const armnn::DataLayoutIndexed& dataLayout)
3099 {
3100     const armnn::TensorInfo inputTensorInfo = GetTensorInfo<float>(1, 2, 2, 2, dataLayout);
3101     const armnn::TensorInfo outputTensorInfo = GetTensorInfo<float>(1, 2, 1, 1, dataLayout);
3102
3103     std::vector<float> inputData({
3104           1.0f, 255.0f,
3105         200.0f, 250.0f,
3106
3107         250.0f, 200.0f,
3108         250.0f,   1.0f
3109     });
3110
3111     // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
3112     // then figures out the interpolants and weights. Note this is different to projecting the centre of the
3113     // output texel. Thus, for a input matrix of 2x2, we'll expect the output 1x1 matrix to contain, as
3114     // its single element, the value that was at position (0,0) of the input matrix (rather than an average,
3115     // which we would expect if projecting the centre).
3116
3117     std::vector<float> outputData({
3118           1.0f,
3119
3120         250.0f
3121     });
3122
3123     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
3124     if (dataLayout.GetDataLayout() == armnn::DataLayout::NHWC)
3125     {
3126         std::vector<float> tmp(inputData.size());
3127         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
3128         inputData = tmp;
3129
3130         std::vector<float> tmp1(outputData.size());
3131         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
3132         outputData = tmp1;
3133     }
3134
3135     auto input = MakeTensor<float, 4>(inputTensorInfo, inputData);
3136
3137     LayerTestResult<float, 4> result(outputTensorInfo);
3138     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
3139
3140     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3141     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3142
3143     armnn::ResizeBilinearQueueDescriptor descriptor;
3144     descriptor.m_Parameters.m_DataLayout = dataLayout;
3145     armnn::WorkloadInfo info;
3146     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3147     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3148
3149     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3150
3151     inputHandle->Allocate();
3152     outputHandle->Allocate();
3153     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3154
3155     workloadFactory.Finalize();
3156     workload->Execute();
3157
3158     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3159     return result;
3160 }
3161
3162 LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory,
3163                                                   const armnn::DataLayoutIndexed& dataLayout)
3164 {
3165     const armnn::TensorInfo inputTensorInfo = GetTensorInfo<float>(1, 2, 4, 4, dataLayout);
3166     const armnn::TensorInfo outputTensorInfo = GetTensorInfo<float>(1, 2, 2, 2, dataLayout);
3167
3168     std::vector<float> inputData({
3169         1.0f, 2.0f, 3.0f, 4.0f,
3170         2.0f, 3.0f, 4.0f, 5.0f,
3171         3.0f, 4.0f, 5.0f, 6.0f,
3172         4.0f, 5.0f, 6.0f, 7.0f,
3173
3174         7.0f, 6.0f, 5.0f, 4.0f,
3175         6.0f, 5.0f, 4.0f, 3.0f,
3176         5.0f, 4.0f, 3.0f, 2.0f,
3177         4.0f, 3.0f, 2.0f, 1.0f
3178     });
3179
3180     std::vector<float> outputData({
3181         1.0f, 3.0f,
3182         3.0f, 5.0f,
3183
3184         7.0f, 5.0f,
3185         5.0f, 3.0f
3186     });
3187
3188     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
3189     if (dataLayout.GetDataLayout() == armnn::DataLayout::NHWC)
3190     {
3191         std::vector<float> tmp(inputData.size());
3192         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
3193         inputData = tmp;
3194
3195         std::vector<float> tmp1(outputData.size());
3196         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
3197         outputData = tmp1;
3198     }
3199
3200     auto input = MakeTensor<float, 4>(inputTensorInfo, inputData);
3201
3202     LayerTestResult<float, 4> result(outputTensorInfo);
3203     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
3204
3205     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3206     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3207
3208     armnn::ResizeBilinearQueueDescriptor descriptor;
3209     descriptor.m_Parameters.m_DataLayout = dataLayout;
3210     armnn::WorkloadInfo info;
3211     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3212     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3213
3214     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3215
3216     inputHandle->Allocate();
3217     outputHandle->Allocate();
3218     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3219
3220     workloadFactory.Finalize();
3221     workload->Execute();
3222
3223     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3224     return result;
3225 }
3226
3227 LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory,
3228                                                 const armnn::DataLayoutIndexed& dataLayout)
3229 {
3230     const armnn::TensorInfo inputTensorInfo = GetTensorInfo<float>(1, 2, 3, 5, dataLayout);
3231     const armnn::TensorInfo outputTensorInfo = GetTensorInfo<float>(1, 2, 2, 3, dataLayout);
3232
3233     std::vector<float> inputData({
3234           1.0f,   2.0f,   3.0f,   5.0f,   8.0f,
3235          13.0f,  21.0f,  34.0f,  55.0f,  89.0f,
3236         144.0f, 233.0f, 377.0f, 610.0f, 987.0f,
3237
3238         987.0f, 610.0f, 377.0f, 233.0f, 144.0f,
3239          89.0f,  55.0f,  34.0f,  21.0f,  13.0f,
3240           8.0f,   5.0f,   3.0f,   2.0f,   1.0f
3241     });
3242
3243     std::vector<float> outputData({
3244           1.0f,   2.6666f,   6.00f,
3245          78.5f, 179.3333f, 401.00f,
3246
3247         987.0f, 454.6670f, 203.33f,
3248          48.5f,  22.3333f,  10.00f
3249     });
3250
3251     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
3252     if (dataLayout.GetDataLayout() == armnn::DataLayout::NHWC)
3253     {
3254         std::vector<float> tmp(inputData.size());
3255         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
3256         inputData = tmp;
3257
3258         std::vector<float> tmp1(outputData.size());
3259         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
3260         outputData = tmp1;
3261     }
3262
3263     auto input = MakeTensor<float, 4>(inputTensorInfo, inputData);
3264
3265     LayerTestResult<float, 4> result(outputTensorInfo);
3266     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
3267
3268     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3269     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3270
3271     armnn::ResizeBilinearQueueDescriptor descriptor;
3272     descriptor.m_Parameters.m_DataLayout = dataLayout;
3273     armnn::WorkloadInfo info;
3274     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3275     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3276
3277     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3278
3279     inputHandle->Allocate();
3280     outputHandle->Allocate();
3281     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3282
3283     workloadFactory.Finalize();
3284     workload->Execute();
3285
3286     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3287     return result;
3288 }
3289
3290 LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory,
3291                                                 const armnn::DataLayoutIndexed& dataLayout)
3292 {
3293     const armnn::TensorInfo inputTensorInfo = GetTensorInfo<float>(1, 2, 3, 2, dataLayout);
3294     const armnn::TensorInfo outputTensorInfo = GetTensorInfo<float>(1, 2, 3, 5, dataLayout);
3295
3296     std::vector<float> inputData({
3297           1.0f,   2.0f,
3298          13.0f,  21.0f,
3299         144.0f, 233.0f,
3300
3301         233.0f, 144.0f,
3302          21.0f,  13.0f,
3303           2.0f,   1.0f
3304     });
3305
3306     std::vector<float> outputData({
3307           1.0f,   1.4f,   1.8f,   2.0f,   2.0f,
3308          13.0f,  16.2f,  19.4f,  21.0f,  21.0f,
3309         144.0f, 179.6f, 215.2f, 233.0f, 233.0f,
3310
3311         233.0f, 197.4f, 161.8f, 144.0f, 144.0f,
3312          21.0f,  17.8f,  14.6f,  13.0f,  13.0f,
3313           2.0f,   1.6f,   1.2f,   1.0f,   1.0f
3314     });
3315
3316     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
3317     if (dataLayout.GetDataLayout() == armnn::DataLayout::NHWC)
3318     {
3319         std::vector<float> tmp(inputData.size());
3320         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
3321         inputData = tmp;
3322
3323         std::vector<float> tmp1(outputData.size());
3324         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
3325         outputData = tmp1;
3326     }
3327
3328     auto input = MakeTensor<float, 4>(inputTensorInfo, inputData);
3329
3330     LayerTestResult<float, 4> result(outputTensorInfo);
3331     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
3332
3333     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3334     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3335
3336     armnn::ResizeBilinearQueueDescriptor descriptor;
3337     descriptor.m_Parameters.m_DataLayout = dataLayout;
3338     armnn::WorkloadInfo info;
3339     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3340     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3341
3342     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3343
3344     inputHandle->Allocate();
3345     outputHandle->Allocate();
3346     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3347
3348     workloadFactory.Finalize();
3349     workload->Execute();
3350
3351     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3352     return result;
3353 }
3354
3355 LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory)
3356 {
3357     constexpr unsigned int width = 2;
3358     constexpr unsigned int height = 3;
3359
3360     const armnn::TensorInfo tensorInfo({height, width },
3361         armnn::DataType::Float32);
3362     auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
3363        -10.0f,  -5.0f,
3364          0.0f,   5.0f,
3365         10.0f,  10.0f
3366     }));
3367
3368     LayerTestResult<float, 2> ret(tensorInfo);
3369
3370     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
3371
3372     std::unique_ptr<armnn::ITensorHandle> outputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
3373
3374     armnn::FakeQuantizationQueueDescriptor data;
3375     armnn::WorkloadInfo info;
3376
3377     AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
3378     AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
3379     float min = -10.f;
3380     float max = 10.f;
3381
3382     data.m_Parameters.m_Min = min;
3383     data.m_Parameters.m_Max = max;
3384
3385     armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]);
3386     armnn::FakeQuantizationQueueDescriptor refData = data;
3387     armnn::WorkloadInfo refInfo = info;
3388     SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle);
3389
3390     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info);
3391
3392     inputHandle->Allocate();
3393     outputHandle->Allocate();
3394
3395     CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
3396
3397     workloadFactory.Finalize();
3398     workload->Execute();
3399
3400     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
3401
3402     ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
3403         0.0f,     63.0f,
3404         128.0f,   191.0f,
3405         255.0f,   255.0f
3406     }));
3407     return ret;
3408 }
3409
3410 namespace
3411 {
3412
3413 LayerTestResult<float, 4> L2NormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory,
3414                                                   const armnn::TensorShape& inputOutputTensorShape,
3415                                                   const std::vector<float>& inputValues,
3416                                                   const std::vector<float>& expectedOutputValues,
3417                                                   armnn::DataLayout dataLayout)
3418 {
3419     const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32);
3420     const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32);
3421
3422     auto inputTensor = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(inputValues));
3423
3424     LayerTestResult<float, 4> result(outputTensorInfo);
3425     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(expectedOutputValues));
3426
3427     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3428     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3429
3430     armnn::L2NormalizationQueueDescriptor descriptor;
3431     descriptor.m_Parameters.m_DataLayout = dataLayout;
3432     armnn::WorkloadInfo info;
3433
3434     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3435     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3436
3437     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
3438
3439     inputHandle->Allocate();
3440     outputHandle->Allocate();
3441
3442     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
3443
3444     workloadFactory.Finalize();
3445     workload->Execute();
3446
3447     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3448
3449     return result;
3450 }
3451
3452 float CalcInvL2Norm(std::initializer_list<float> elements)
3453 {
3454     const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f,
3455         [](float acc, float element) { return acc + element * element; });
3456     return 1.0f / sqrtf(reduction);
3457 }
3458
3459 } // anonymous namespace
3460
3461 template<typename T>
3462 LayerTestResult<T, 2> Pad2dTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset)
3463 {
3464   const armnn::TensorShape inputShape{ 3, 3 };
3465   const armnn::TensorShape outputShape{ 7, 7 };
3466
3467   const armnn::TensorInfo inputTensorInfo(inputShape, armnn::GetDataType<T>());
3468   const armnn::TensorInfo outputTensorInfo(outputShape, armnn::GetDataType<T>());
3469
3470   std::vector<T> inputValues(
3471     QuantizedVector<T>(qScale, qOffset,
3472     {
3473       // Height (3) x Width (3)
3474       4, 8, 6,
3475       7, 4, 4,
3476       3, 2, 4
3477     }));
3478
3479  std::vector<T> expectedOutputValues(
3480   QuantizedVector<T>(qScale, qOffset,
3481     {
3482       0, 0, 0, 0, 0, 0, 0,
3483       0, 0, 0, 0, 0, 0, 0,
3484       0, 0, 4, 8, 6, 0, 0,
3485       0, 0, 7, 4, 4, 0, 0,
3486       0, 0, 3, 2, 4, 0, 0,
3487       0, 0, 0, 0, 0, 0, 0,
3488       0, 0, 0, 0, 0, 0, 0
3489     }));
3490
3491   auto inputTensor = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>(inputValues));
3492
3493   LayerTestResult<T, 2> result(outputTensorInfo);
3494   result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>(expectedOutputValues));
3495
3496   std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3497   std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3498
3499   armnn::PadQueueDescriptor descriptor;
3500
3501   std::vector<std::pair<unsigned int, unsigned int>> PadList;
3502   PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
3503   PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
3504
3505   descriptor.m_Parameters.m_PadList = PadList;
3506   armnn::WorkloadInfo info;
3507
3508   AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3509   AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3510
3511   std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
3512
3513   inputHandle->Allocate();
3514   outputHandle->Allocate();
3515
3516   CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
3517
3518   workloadFactory.Finalize();
3519   workload->Execute();
3520
3521   CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
3522
3523   return result;
3524 }
3525
3526 template <typename T>
3527 LayerTestResult<T, 3> Pad3dTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset)
3528 {
3529     const armnn::TensorShape inputShape{ 2, 2, 2 };
3530     const armnn::TensorShape outputShape{ 3, 5, 6 };
3531
3532     const armnn::TensorInfo inputTensorInfo(inputShape, armnn::GetDataType<T>());
3533     const armnn::TensorInfo outputTensorInfo(outputShape, armnn::GetDataType<T>());
3534
3535     std::vector<T> inputValues(
3536       QuantizedVector<T>(qScale,qOffset,
3537     {
3538         // Channel 0, Height (2) x Width (2)
3539         0, 4,
3540         2, 5,
3541
3542         // Channel 1, Height (2) x Width (2)
3543         6, 1,
3544         5, 2
3545     }));
3546
3547     std::vector<T> expectedOutputValues(
3548       QuantizedVector<T>(qScale,qOffset,
3549     {
3550
3551         0, 0, 0, 0, 0, 0,
3552         0, 0, 0, 0, 0, 0,
3553         0, 0, 0, 4, 0, 0,
3554         0, 0, 2, 5, 0, 0,
3555         0, 0, 0, 0, 0, 0,
3556
3557         0, 0, 0, 0, 0, 0,
3558         0, 0, 0, 0, 0, 0,
3559         0, 0, 6, 1, 0, 0,
3560         0, 0, 5, 2, 0, 0,
3561         0, 0, 0, 0, 0, 0,
3562
3563         0, 0, 0, 0, 0, 0,
3564         0, 0, 0, 0, 0, 0,
3565         0, 0, 0, 0, 0, 0,
3566         0, 0, 0, 0, 0, 0,
3567         0, 0, 0, 0, 0, 0
3568
3569     }));
3570
3571     auto inputTensor = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>(inputValues));
3572
3573     LayerTestResult<T, 3> result(outputTensorInfo);
3574     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(expectedOutputValues));
3575
3576     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3577     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3578
3579     armnn::PadQueueDescriptor descriptor;
3580
3581     std::vector<std::pair<unsigned int, unsigned int>> PadList;
3582     PadList.push_back(std::pair<unsigned int, unsigned int>(0,1));
3583     PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
3584     PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
3585
3586     descriptor.m_Parameters.m_PadList = PadList;
3587     armnn::WorkloadInfo info;
3588
3589     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3590     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3591
3592     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
3593
3594     inputHandle->Allocate();
3595     outputHandle->Allocate();
3596
3597     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
3598
3599     workloadFactory.Finalize();
3600     workload->Execute();
3601
3602     CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
3603
3604     return result;
3605 }
3606
3607 template <typename T>
3608 LayerTestResult<T, 4> Pad4dTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset)
3609 {
3610     const armnn::TensorShape inputShape{ 2, 2, 3, 2 };
3611     const armnn::TensorShape outputShape{ 4, 5, 7, 4 };
3612
3613     const armnn::TensorInfo inputTensorInfo(inputShape, armnn::GetDataType<T>());
3614     const armnn::TensorInfo outputTensorInfo(outputShape, armnn::GetDataType<T>());
3615
3616     std::vector<T> inputValues(
3617       QuantizedVector<T>(qScale,qOffset,
3618     {
3619         // Batch 0, Channel 0, Height (3) x Width (2)
3620         0, 1,
3621         2, 3,
3622         4, 5,
3623
3624         // Batch 0, Channel 1, Height (3) x Width (2)
3625         6, 7,
3626         8, 9,
3627         10, 11,
3628
3629         // Batch 1, Channel 0, Height (3) x Width (2)
3630         12, 13,
3631         14, 15,
3632         16, 17,
3633
3634         // Batch 1, Channel 1, Height (3) x Width (2)
3635         18, 19,
3636         20, 21,
3637         22, 23
3638     }));
3639
3640     std::vector<T> expectedOutputValues(
3641       QuantizedVector<T>(qScale,qOffset,
3642     {
3643         0, 0, 0, 0,
3644         0, 0, 0, 0,
3645         0, 0, 0, 0,
3646         0, 0, 0, 0,
3647         0, 0, 0, 0,
3648         0, 0, 0, 0,
3649         0, 0, 0, 0,
3650
3651         0, 0, 0, 0,
3652         0, 0, 0, 0,
3653         0, 0, 0, 0,
3654         0, 0, 0, 0,
3655         0, 0, 0, 0,
3656         0, 0, 0, 0,
3657         0, 0, 0, 0,
3658
3659         0, 0, 0, 0,
3660         0, 0, 0, 0,
3661         0, 0, 0, 0,
3662         0, 0, 0, 0,
3663         0, 0, 0, 0,
3664         0, 0, 0, 0,
3665         0, 0, 0, 0,
3666
3667         0, 0, 0, 0,
3668         0, 0, 0, 0,
3669         0, 0, 0, 0,
3670         0, 0, 0, 0,
3671         0, 0, 0, 0,
3672         0, 0, 0, 0,
3673         0, 0, 0, 0,
3674
3675         0, 0, 0, 0,
3676         0, 0, 0, 0,
3677         0, 0, 0, 0,
3678         0, 0, 0, 0,
3679         0, 0, 0, 0,
3680         0, 0, 0, 0,
3681         0, 0, 0, 0,
3682
3683         0, 0, 0, 0,
3684         0, 0, 0, 0,
3685         0, 0, 0, 0,
3686         0, 0, 0, 0,
3687         0, 0, 0, 0,
3688         0, 0, 0, 0,
3689         0, 0, 0, 0,
3690
3691         0, 0, 0, 0,
3692         0, 0, 0, 0,
3693         0, 0, 0, 0,
3694         0, 0, 0, 0,
3695         0, 0, 0, 0,
3696         0, 0, 0, 0,
3697         0, 0, 0, 0,
3698
3699         0, 0, 0, 0,
3700         0, 0, 0, 0,
3701         0, 0, 0, 0,
3702         0, 0, 1, 0,
3703         0, 2, 3, 0,
3704         0, 4, 5, 0,
3705         0, 0, 0, 0,
3706
3707         0, 0, 0, 0,
3708         0, 0, 0, 0,
3709         0, 0, 0, 0,
3710         0, 6, 7, 0,
3711         0, 8, 9, 0,
3712         0, 10, 11, 0,
3713         0, 0, 0, 0,
3714
3715         0, 0, 0, 0,
3716         0, 0, 0, 0,
3717         0, 0, 0, 0,
3718         0, 0, 0, 0,
3719         0, 0, 0, 0,
3720         0, 0, 0, 0,
3721         0, 0, 0, 0,
3722
3723         0, 0, 0, 0,
3724         0, 0, 0, 0,
3725         0, 0, 0, 0,
3726         0, 0, 0, 0,
3727         0, 0, 0, 0,
3728         0, 0, 0, 0,
3729         0, 0, 0, 0,
3730
3731         0, 0, 0, 0,
3732         0, 0, 0, 0,
3733         0, 0, 0, 0,
3734         0, 0, 0, 0,
3735         0, 0, 0, 0,
3736         0, 0, 0, 0,
3737         0, 0, 0, 0,
3738
3739         0, 0, 0, 0,
3740         0, 0, 0, 0,
3741         0, 0, 0, 0,
3742         0, 12, 13, 0,
3743         0, 14, 15, 0,
3744         0, 16, 17, 0,
3745         0, 0, 0, 0,
3746
3747         0, 0, 0, 0,
3748         0, 0, 0, 0,
3749         0, 0, 0, 0,
3750         0, 18, 19, 0,
3751         0, 20, 21, 0,
3752         0, 22, 23, 0,
3753         0, 0, 0, 0,
3754
3755         0, 0, 0, 0,
3756         0, 0, 0, 0,
3757         0, 0, 0, 0,
3758         0, 0, 0, 0,
3759         0, 0, 0, 0,
3760         0, 0, 0, 0,
3761         0, 0, 0, 0,
3762
3763         0, 0, 0, 0,
3764         0, 0, 0, 0,
3765         0, 0, 0, 0,
3766         0, 0, 0, 0,
3767         0, 0, 0, 0,
3768         0, 0, 0, 0,
3769         0, 0, 0, 0,
3770
3771         0, 0, 0, 0,
3772         0, 0, 0, 0,
3773         0, 0, 0, 0,
3774         0, 0, 0, 0,
3775         0, 0, 0, 0,
3776         0, 0, 0, 0,
3777         0, 0, 0, 0,
3778
3779         0, 0, 0, 0,
3780         0, 0, 0, 0,
3781         0, 0, 0, 0,
3782         0, 0, 0, 0,
3783         0, 0, 0, 0,
3784         0, 0, 0, 0,
3785         0, 0, 0, 0,
3786
3787         0, 0, 0, 0,
3788         0, 0, 0, 0,
3789         0, 0, 0, 0,
3790         0, 0, 0, 0,
3791         0, 0, 0, 0,
3792         0, 0, 0, 0,
3793         0, 0, 0, 0,
3794
3795         0, 0, 0, 0,
3796         0, 0, 0, 0,
3797         0, 0, 0, 0,
3798         0, 0, 0, 0,
3799         0, 0, 0, 0,
3800         0, 0, 0, 0,
3801         0, 0, 0, 0
3802     }));
3803
3804     auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(inputValues));
3805
3806     LayerTestResult<T, 4> result(outputTensorInfo);
3807     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(expectedOutputValues));
3808
3809     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3810     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3811
3812     armnn::PadQueueDescriptor descriptor;
3813
3814     std::vector<std::pair<unsigned int, unsigned int>> PadList;
3815     PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
3816     PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
3817     PadList.push_back(std::pair<unsigned int, unsigned int>(3,1));
3818     PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
3819
3820     descriptor.m_Parameters.m_PadList = PadList;
3821     armnn::WorkloadInfo info;
3822
3823     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3824     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3825
3826     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
3827
3828     inputHandle->Allocate();
3829     outputHandle->Allocate();
3830
3831     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
3832
3833     workloadFactory.Finalize();
3834
3835     workload->Execute();
3836
3837     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3838
3839     return result;
3840 }
3841
3842 LayerTestResult<uint8_t, 2> PadUint82dTest(armnn::IWorkloadFactory& workloadFactory)
3843 {
3844   return Pad2dTestCommon<uint8_t>(workloadFactory, 1.0f, 0);
3845 }
3846
3847 LayerTestResult<uint8_t, 3> PadUint83dTest(armnn::IWorkloadFactory& workloadFactory)
3848 {
3849   return Pad3dTestCommon<uint8_t>(workloadFactory, 1.0f, 0);
3850 }
3851
3852 LayerTestResult<uint8_t, 4> PadUint84dTest(armnn::IWorkloadFactory& workloadFactory)
3853 {
3854   return Pad4dTestCommon<uint8_t>(workloadFactory, 1.0f, 0);
3855 }
3856
3857 LayerTestResult<float, 2> PadFloat322dTest(armnn::IWorkloadFactory& workloadFactory)
3858 {
3859   return Pad2dTestCommon<float>(workloadFactory, 0.0f, 0);
3860 }
3861
3862 LayerTestResult<float, 3> PadFloat323dTest(armnn::IWorkloadFactory& workloadFactory)
3863 {
3864   return Pad3dTestCommon<float>(workloadFactory, 0.0f, 0);
3865 }
3866
3867 LayerTestResult<float, 4> PadFloat324dTest(armnn::IWorkloadFactory& workloadFactory)
3868 {
3869   return Pad4dTestCommon<float>(workloadFactory, 0.0f, 0);
3870 }
3871
3872 LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory)
3873 {
3874     // Width: 1
3875     // Height: 1
3876     // Channels: 10
3877     // BatchSize: 1
3878
3879     const armnn::TensorShape inputOutputShape{ 1, 10, 1, 1 };
3880     std::vector<float> inputValues
3881     {
3882         // Batch 0, Channel 0, Height (1) x Width (1)
3883          1.0f,
3884
3885         // Batch 0, Channel 1, Height (1) x Width (1)
3886          2.0f,
3887
3888         // Batch 0, Channel 2, Height (1) x Width (1)
3889          3.0f,
3890
3891         // Batch 0, Channel 3, Height (1) x Width (1)
3892          4.0f,
3893
3894         // Batch 0, Channel 4, Height (1) x Width (1)
3895          5.0f,
3896
3897         // Batch 0, Channel 5, Height (1) x Width (1)
3898          6.0f,
3899
3900         // Batch 0, Channel 6, Height (1) x Width (1)
3901          7.0f,
3902
3903         // Batch 0, Channel 7, Height (1) x Width (1)
3904          8.0f,
3905
3906         // Batch 0, Channel 8, Height (1) x Width (1)
3907          9.0f,
3908
3909         // Batch 0, Channel 9, Height (1) x Width (1)
3910         10.0f
3911     };
3912     const float approxInvL2Norm = 0.050964719f;
3913     std::vector<float> expectedOutputValues
3914     {
3915         // Batch 0, Channel 0, Height (1) x Width (1)
3916          1.0f * approxInvL2Norm,
3917          2.0f * approxInvL2Norm,
3918          3.0f * approxInvL2Norm,
3919          4.0f * approxInvL2Norm,
3920          5.0f * approxInvL2Norm,
3921          6.0f * approxInvL2Norm,
3922          7.0f * approxInvL2Norm,
3923          8.0f * approxInvL2Norm,
3924          9.0f * approxInvL2Norm,
3925         10.0f * approxInvL2Norm
3926     };
3927
3928     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
3929                                    inputValues, expectedOutputValues, armnn::DataLayout::NCHW);
3930 }
3931
3932 LayerTestResult<float, 4> L2Normalization1dNhwcTest(armnn::IWorkloadFactory& workloadFactory)
3933 {
3934     // Width: 1
3935     // Height: 1
3936     // Channels: 10
3937     // BatchSize: 1
3938
3939     const armnn::TensorShape inputOutputShape{ 1, 1, 1, 10 };
3940     std::vector<float> inputValues
3941     {
3942         // Batch 0, Height 0, Width (1) x Channel (10)
3943         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f
3944     };
3945     const float approxInvL2Norm = 0.050964719f;
3946     std::vector<float> expectedOutputValues
3947     {
3948         // Batch 0, Height 0, Width (1) x Channel (10)
3949          1.0f * approxInvL2Norm,
3950          2.0f * approxInvL2Norm,
3951          3.0f * approxInvL2Norm,
3952          4.0f * approxInvL2Norm,
3953          5.0f * approxInvL2Norm,
3954          6.0f * approxInvL2Norm,
3955          7.0f * approxInvL2Norm,
3956          8.0f * approxInvL2Norm,
3957          9.0f * approxInvL2Norm,
3958         10.0f * approxInvL2Norm
3959     };
3960
3961     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
3962                                    inputValues, expectedOutputValues, armnn::DataLayout::NHWC);
3963 }
3964
3965 LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory)
3966 {
3967     // Width: 5
3968     // Height: 1
3969     // Channels: 2
3970     // BatchSize: 1
3971
3972     const armnn::TensorShape inputOutputShape{ 1, 2, 1, 5 };
3973     std::vector<float> inputValues
3974     {
3975         // Batch 0, Channel 0, Height (1) x Width (5)
3976         1.0f, 3.0f, 5.0f, 7.0f,  9.0f,
3977
3978         // Batch 0, Channel 1, Height (1) x Width (5)
3979         2.0f, 4.0f, 6.0f, 8.0f, 10.0f
3980     };
3981     std::vector<float> expectedOutputValues
3982     {
3983         // Batch 0, Channel 0, Height (1) x Width (5)
3984          1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
3985          3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
3986          5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
3987          7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
3988          9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
3989
3990         // Batch 0, Channel 1, Height (1) x Width (5)
3991          2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
3992          4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
3993          6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
3994          8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
3995         10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
3996     };
3997
3998     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
3999                                    inputValues, expectedOutputValues, armnn::DataLayout::NCHW);
4000 }
4001
4002 LayerTestResult<float, 4> L2Normalization2dNhwcTest(armnn::IWorkloadFactory& workloadFactory)
4003 {
4004     // Width: 5
4005     // Height: 1
4006     // Channels: 2
4007     // BatchSize: 1
4008
4009     const armnn::TensorShape inputOutputShape{ 1, 1, 5, 2 };
4010     std::vector<float> inputValues
4011     {
4012         // Batch 0, Height 0, Width (5) x Channel (2)
4013         1.0f,  2.0f,
4014         3.0f,  4.0f,
4015         5.0f,  6.0f,
4016         7.0f,  8.0f,
4017         9.0f, 10.0f
4018     };
4019     std::vector<float> expectedOutputValues
4020     {
4021         // Batch 0, Height 0, Width (5) x Channel (2)
4022         1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
4023         2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
4024         3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
4025         4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
4026         5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
4027         6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
4028         7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
4029         8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
4030         9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
4031        10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
4032     };
4033
4034     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
4035                                    inputValues, expectedOutputValues, armnn::DataLayout::NHWC);
4036 }
4037
4038 LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory)
4039 {
4040     // Width: 3
4041     // Height: 4
4042     // Channels: 2
4043     // BatchSize: 1
4044
4045     const armnn::TensorShape inputOutputShape{ 1, 2, 4, 3 };
4046     std::vector<float> inputValues
4047     {
4048         // Batch 0, Channel 0, Height (4) x Width (3)
4049         119.0f,  21.0f, 150.0f,
4050         149.0f,  32.0f, 179.0f,
4051          15.0f, 227.0f, 141.0f,
4052         147.0f, 199.0f, 220.0f,
4053
4054         // Batch 0, Channel 1, Height (4) x Width (3)
4055         110.0f, 140.0f,  73.0f,
4056         211.0f, 212.0f,  89.0f,
4057          24.0f, 138.0f, 188.0f,
4058         162.0f,  12.0f, 161.0f
4059     };
4060     std::vector<float> expectedOutputValues
4061     {
4062         // Batch 0, Channel 0, Height (4) x Width (3)
4063         119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
4064          21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
4065         150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
4066         149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
4067          32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
4068         179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
4069          15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
4070         227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
4071         141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
4072         147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
4073         199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
4074         220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
4075
4076         // Batch 0, Channel 1, Height (4) x Width (3)
4077         110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
4078         140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
4079          73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
4080         211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
4081         212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
4082          89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
4083          24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
4084         138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
4085         188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
4086         162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
4087          12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
4088         161.0f * CalcInvL2Norm({ 220.0f, 161.0f })
4089     };
4090
4091     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
4092                                    inputValues, expectedOutputValues, armnn::DataLayout::NCHW);
4093 }
4094
4095 LayerTestResult<float, 4> L2Normalization3dNhwcTest(armnn::IWorkloadFactory& workloadFactory)
4096 {
4097     // Width: 3
4098     // Height: 4
4099     // Channels: 2
4100     // BatchSize: 1
4101
4102     const armnn::TensorShape inputOutputShape{ 1, 4, 3, 2 };
4103     std::vector<float> inputValues
4104     {
4105         // Batch 0, Height 0, Width (3) x Channel (2)
4106         119.0f, 110.0f,
4107          21.0f, 140.0f,
4108         150.0f,  73.0f,
4109
4110         // Batch 0, Height 1, Width (3) x Channel (2)
4111         149.0f, 211.0f,
4112          32.0f, 212.0f,
4113         179.0f,  89.0f,
4114
4115         // Batch 0, Height 2, Width (3) x Channel (2)
4116          15.0f,  24.0f,
4117         227.0f, 138.0f,
4118         141.0f, 188.0f,
4119
4120         // Batch 0, Height 3, Width (3) x Channel (2)
4121         147.0f, 162.0f,
4122         199.0f,  12.0f,
4123         220.0f, 161.0f
4124     };
4125     std::vector<float> expectedOutputValues
4126     {
4127         // Batch 0, Height 0, Width (3) x Channel (2)
4128         119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
4129         110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
4130          21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
4131         140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
4132         150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
4133          73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
4134
4135         // Batch 0, Height 1, Width (3) x Channel (2)
4136         149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
4137         211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
4138          32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
4139         212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
4140         179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
4141          89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
4142
4143         // Batch 0, Height 2, Width (3) x Channel (2)
4144          15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
4145          24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
4146         227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
4147         138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
4148         141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
4149         188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
4150
4151         // Batch 0, Height 3, Width (3) x Channel (2)
4152         147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
4153         162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
4154         199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
4155          12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
4156         220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
4157         161.0f * CalcInvL2Norm({ 220.0f, 161.0f })
4158     };
4159
4160     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
4161                                    inputValues, expectedOutputValues, armnn::DataLayout::NHWC);
4162 }
4163
4164 LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory)
4165 {
4166     // Width: 3
4167     // Height: 4
4168     // Channels: 3
4169     // BatchSize: 2
4170
4171     const armnn::TensorShape inputOutputShape{ 2, 3, 4, 3 };
4172     std::vector<float> inputValues
4173     {
4174         // Batch 0, Channel 0, Height (4) x Width (3)
4175         235.0f,  46.0f, 178.0f,
4176         100.0f, 123.0f,  19.0f,
4177         172.0f,  74.0f, 250.0f,
4178           6.0f, 195.0f,  80.0f,
4179
4180         // Batch 0, Channel 1, Height (4) x Width (3)
4181         113.0f,  95.0f, 202.0f,
4182          77.0f, 114.0f,  71.0f,
4183         122.0f, 246.0f, 166.0f,
4184          82.0f,  28.0f,  37.0f,
4185
4186         // Batch 0, Channel 2, Height (4) x Width (3)
4187          56.0f, 170.0f, 162.0f,
4188         194.0f,  89.0f, 254.0f,
4189          12.0f, 209.0f, 200.0f,
4190           1.0f,  64.0f,  54.0f,
4191
4192         // Batch 1, Channel 0, Height (4) x Width (3)
4193          67.0f,  90.0f,  49.0f,
4194           7.0f, 163.0f,  18.0f,
4195          25.0f, 117.0f, 103.0f,
4196         247.0f,  59.0f, 189.0f,
4197
4198         // Batch 1, Channel 1, Height (4) x Width (3)
4199         239.0f, 104.0f, 199.0f,
4200          17.0f, 124.0f, 153.0f,
4201         222.0f, 217.0f, 75.0f,
4202          32.0f, 126.0f, 21.0f,
4203
4204         // Batch 1, Channel 2, Height (4) x Width (3)
4205          97.0f, 145.0f, 215.0f,
4206         115.0f, 116.0f, 238.0f,
4207         226.0f,  16.0f, 132.0f,
4208          92.0f, 125.0f,  88.0f
4209     };
4210     std::vector<float> expectedOutputValues
4211     {
4212         // Batch 0, Channel 0, Height (4) x Width (3)
4213         235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
4214          46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
4215         178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
4216         100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
4217         123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
4218          19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
4219         172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
4220          74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
4221         250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
4222           6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
4223         195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
4224          80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
4225
4226         // Batch 0, Channel 1, Height (4) x Width (3)
4227         113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
4228          95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
4229         202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
4230          77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
4231         114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
4232          71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
4233         122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
4234         246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
4235         166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
4236          82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
4237          28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
4238          37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
4239
4240         // Batch 0, Channel 2, Height (4) x Width (3)
4241          56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
4242         170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
4243         162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
4244         194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
4245          89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
4246         254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
4247          12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
4248         209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
4249         200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
4250           1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
4251          64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
4252          54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
4253
4254         // Batch 1, Channel 0, Height (4) x Width (3)
4255          67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
4256          90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
4257          49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
4258           7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
4259         163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
4260          18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
4261          25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
4262         117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
4263         103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
4264         247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
4265          59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
4266         189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
4267
4268         // Batch 1, Channel 1, Height (4) x Width (3)
4269         239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
4270         104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
4271         199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
4272          17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
4273         124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
4274         153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
4275         222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
4276         217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
4277          75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
4278          32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
4279         126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
4280          21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
4281
4282         // Batch 1, Channel 2, Height (4) x Width (3)
4283          97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
4284         145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
4285         215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
4286         115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
4287         116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
4288         238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
4289         226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
4290          16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
4291         132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
4292          92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
4293         125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
4294          88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f })
4295     };
4296
4297     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
4298                                    inputValues, expectedOutputValues, armnn::DataLayout::NCHW);
4299 }
4300
4301 LayerTestResult<float, 4> L2Normalization4dNhwcTest(armnn::IWorkloadFactory& workloadFactory)
4302 {
4303     // Width: 3
4304     // Height: 4
4305     // Channels: 3
4306     // BatchSize: 2
4307
4308     const armnn::TensorShape inputOutputShape{ 2, 4, 3, 3 };
4309     std::vector<float> inputValues
4310     {
4311         // Batch 0, Height 0, Width (3) x Channel (3)
4312         235.0f, 113.0f,  56.0f,
4313          46.0f,  95.0f, 170.0f,
4314         178.0f, 202.0f, 162.0f,
4315
4316         // Batch 0, Height 1, Width (3) x Channel (3)
4317         100.0f,  77.0f, 194.0f,
4318         123.0f, 114.0f,  89.0f,
4319          19.0f,  71.0f, 254.0f,
4320
4321         // Batch 0, Height 2, Width (3) x Channel (3)
4322         172.0f, 122.0f,  12.0f,
4323          74.0f, 246.0f, 209.0f,
4324         250.0f, 166.0f, 200.0f,
4325
4326         // Batch 0, Height 3, Width (3) x Channel (3)
4327           6.0f,  82.0f,   1.0f,
4328         195.0f,  28.0f,  64.0f,
4329          80.0f,  37.0f,  54.0f,
4330
4331         // Batch 1, Height 0, Width (3) x Channel (3)
4332          67.0f, 239.0f,  97.0f,
4333          90.0f, 104.0f, 145.0f,
4334          49.0f, 199.0f, 215.0f,
4335
4336         // Batch 1, Height 1, Width (3) x Channel (3)
4337           7.0f,  17.0f, 115.0f,
4338         163.0f, 124.0f, 116.0f,
4339          18.0f, 153.0f, 238.0f,
4340
4341         // Batch 1, Height 2, Width (3) x Channel (3)
4342          25.0f, 222.0f, 226.0f,
4343         117.0f, 217.0f,  16.0f,
4344         103.0f,  75.0f, 132.0f,
4345
4346         // Batch 1, Height 3, Width (3) x Channel (3)
4347         247.0f,  32.0f,  92.0f,
4348          59.0f, 126.0f, 125.0f,
4349         189.0f,  21.0f,  88.0f
4350     };
4351     std::vector<float> expectedOutputValues
4352     {
4353         // Batch 0, Height 0, Width (3) x Channel (3)
4354         235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
4355         113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
4356          56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
4357          46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
4358          95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
4359         170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
4360         178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
4361         202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
4362         162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
4363
4364         // Batch 0, Height 1, Width (3) x Channel (3)
4365         100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
4366          77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
4367         194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
4368         123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
4369         114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
4370          89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
4371          19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
4372          71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
4373         254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
4374
4375         // Batch 0, Height 2, Width (3) x Channel (3)
4376         172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
4377         122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
4378          12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
4379          74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
4380         246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
4381         209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
4382         250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
4383         166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
4384         200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
4385
4386         // Batch 0, Height 3, Width (3) x Channel (3)
4387           6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
4388          82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
4389           1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
4390         195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
4391          28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
4392          64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
4393          80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
4394          37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
4395          54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
4396
4397         // Batch 1, Height 0, Width (3) x Channel (3)
4398          67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
4399         239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
4400          97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
4401          90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
4402         104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
4403         145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
4404          49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
4405         199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
4406         215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
4407
4408         // Batch 1, Height 1, Width (3) x Channel (3)
4409           7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
4410          17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
4411         115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
4412         163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
4413         124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
4414         116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
4415          18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
4416         153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
4417         238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
4418
4419         // Batch 1, Height 2, Width (3) x Channel (3)
4420          25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
4421         222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
4422         226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
4423         117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
4424         217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
4425          16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
4426         103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
4427          75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
4428         132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
4429
4430         // Batch 1, Height 3, Width (3) x Channel (3)
4431         247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
4432          32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
4433          92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
4434          59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
4435         126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
4436         125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
4437         189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
4438          21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
4439          88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f })
4440     };
4441
4442     return L2NormalizationTestImpl(workloadFactory, inputOutputShape,
4443                                    inputValues, expectedOutputValues, armnn::DataLayout::NHWC);
4444 }
4445
4446 template <typename T>
4447 LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory,
4448     float qScale,
4449     int32_t qOffset)
4450 {
4451     constexpr unsigned int inputWidth = 3;
4452     constexpr unsigned int inputHeight = 4;
4453     constexpr unsigned int inputChannels = 3;
4454     constexpr unsigned int inputBatchSize = 2;
4455
4456     constexpr unsigned int outputWidth = inputWidth;
4457     constexpr unsigned int outputHeight = inputHeight;
4458     constexpr unsigned int outputChannels = inputChannels;
4459     constexpr unsigned int outputBatchSize = inputBatchSize;
4460
4461     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
4462         armnn::GetDataType<T>());
4463
4464     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
4465         armnn::GetDataType<T>());
4466
4467     // Set quantization parameters if the requested type is a quantized type.
4468     if(armnn::IsQuantizedType<T>())
4469     {
4470         inputTensorInfo.SetQuantizationScale(qScale);
4471         inputTensorInfo.SetQuantizationOffset(qOffset);
4472         outputTensorInfo.SetQuantizationScale(qScale);
4473         outputTensorInfo.SetQuantizationOffset(qOffset);
4474     }
4475
4476     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
4477         QuantizedVector<T>(qScale, qOffset, {
4478         // Batch 0, Channel 0
4479         235.0f,  46.0f, 178.0f,
4480         100.0f, 123.0f,  19.0f,
4481         172.0f,  74.0f, 250.0f,
4482           6.0f, 195.0f,  80.0f,
4483
4484         // Batch 0, Channel 1
4485         113.0f,  95.0f, 202.0f,
4486          77.0f, 114.0f,  71.0f,
4487         122.0f, 246.0f, 166.0f,
4488          82.0f,  28.0f,  37.0f,
4489
4490         // Batch 0, Channel 2
4491          56.0f, 170.0f, 162.0f,
4492         194.0f,  89.0f, 254.0f,
4493          12.0f, 209.0f, 200.0f,
4494           1.0f,  64.0f,  54.0f,
4495
4496         // Batch 1, Channel 0
4497          67.0f,  90.0f,  49.0f,
4498           7.0f, 163.0f,  18.0f,
4499          25.0f, 117.0f, 103.0f,
4500         247.0f,  59.0f, 189.0f,
4501
4502         // Batch 1, Channel 1
4503         239.0f, 104.0f, 199.0f,
4504          17.0f, 124.0f, 153.0f,
4505         222.0f, 217.0f, 75.0f,
4506          32.0f, 126.0f, 21.0f,
4507
4508         // Batch 1, Channel 2
4509          97.0f, 145.0f, 215.0f,
4510         115.0f, 116.0f, 238.0f,
4511         226.0f,  16.0f, 132.0f,
4512          92.0f, 125.0f,  88.0f,
4513     })));
4514
4515     LayerTestResult<T, 4> result(outputTensorInfo);
4516     result.outputExpected = input;
4517
4518     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4519
4520     armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo);
4521     AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]);
4522
4523     armnn::ConstantQueueDescriptor descriptor;
4524     descriptor.m_LayerOutput = &constantTensor;
4525
4526     armnn::WorkloadInfo info;
4527     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
4528
4529     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info);
4530
4531     outputHandle->Allocate();
4532
4533     workloadFactory.Finalize();
4534     workload->Execute();
4535
4536     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
4537     return result;
4538 }
4539
4540 LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory)
4541 {
4542     return ConstantTestImpl<float>(workloadFactory, 0.0f, 0);
4543 }
4544
4545 LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory)
4546 {
4547     return ConstantTestImpl<uint8_t>(workloadFactory, 1.0f, 0);
4548 }
4549
4550 LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory)
4551 {
4552     unsigned int outputWidth = 3;
4553     unsigned int outputHeight = 6;
4554     unsigned int outputChannels = 3;
4555
4556     unsigned int inputWidth1 = 3;
4557     unsigned int inputHeight1 = 6;
4558     unsigned int inputChannels1 = 2;
4559
4560     unsigned int inputWidth2 = 3;
4561     unsigned int inputHeight2 = 6;
4562     unsigned int inputChannels2 = 1;
4563
4564     // Defines the tensor descriptors.
4565     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
4566     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
4567     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
4568
4569     // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize them.
4570     const float scale = 0.13497836f;
4571     const int32_t offset = -7;
4572
4573     outputTensorInfo.SetQuantizationScale(scale);
4574     outputTensorInfo.SetQuantizationOffset(offset);
4575     inputTensorInfo1.SetQuantizationScale(scale);
4576     inputTensorInfo1.SetQuantizationOffset(offset);
4577     inputTensorInfo2.SetQuantizationScale(scale);
4578     inputTensorInfo2.SetQuantizationOffset(offset);
4579
4580     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
4581
4582     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
4583         {
4584             1, 2, 3,
4585             4, 5, 6,
4586             7, 8, 9,
4587             10, 11, 12,
4588             13, 14, 15,
4589             16, 17, 18,
4590
4591             19, 20, 21,
4592             22, 23, 24,
4593             25, 26, 27,
4594             28, 29, 30,
4595             31, 32, 33,
4596             34, 35, 36,
4597
4598             37, 38, 39,
4599             40, 41, 42,
4600             43, 44, 45,
4601             46, 47, 48,
4602             49, 50, 51,
4603             52, 53, 54,
4604         })
4605     );
4606
4607     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
4608     {
4609         1, 2, 3,
4610         4, 5, 6,
4611         7, 8, 9,
4612         10, 11, 12,
4613         13, 14, 15,
4614         16, 17, 18,
4615
4616         19, 20, 21,
4617         22, 23, 24,
4618         25, 26, 27,
4619         28, 29, 30,
4620         31, 32, 33,
4621         34, 35, 36,
4622     })
4623     );
4624
4625     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
4626     {
4627         37, 38, 39,
4628         40, 41, 42,
4629         43, 44, 45,
4630         46, 47, 48,
4631         49, 50, 51,
4632         52, 53, 54,
4633     })
4634     );
4635
4636     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
4637     armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
4638
4639     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
4640     armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
4641
4642
4643     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4644
4645     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
4646
4647     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
4648         subTensorsSupported ?
4649             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
4650             workloadFactory.CreateTensorHandle(inputTensorInfo1);
4651
4652     std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
4653         subTensorsSupported ?
4654             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
4655             workloadFactory.CreateTensorHandle(inputTensorInfo2);
4656
4657
4658     armnn::MergerQueueDescriptor data;
4659     armnn::WorkloadInfo info;
4660     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
4661     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
4662     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4663
4664     data.m_ViewOrigins.push_back(window1);
4665     data.m_ViewOrigins.push_back(window2);
4666
4667     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
4668
4669     inputHandle1->Allocate();
4670     inputHandle2->Allocate();
4671     outputHandle->Allocate();
4672
4673     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
4674     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
4675
4676     workloadFactory.Finalize();
4677     workload->Execute();
4678
4679     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
4680
4681     return ret;
4682 }
4683
4684 LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory)
4685 {
4686     unsigned int batchSize = 1;
4687     unsigned int channels = 2;
4688     unsigned int height = 2;
4689     unsigned int width = 3;
4690
4691     const float scale = 7.0f;
4692     const int32_t offset = 3;
4693
4694     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
4695     armnn::TensorInfo outputTensorInfo;
4696
4697     const unsigned int shape[] = { batchSize, channels, height, width };
4698     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
4699     inputTensorInfo1.SetQuantizationScale(scale);
4700     inputTensorInfo1.SetQuantizationOffset(offset);
4701
4702     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
4703     inputTensorInfo2.SetQuantizationScale(scale);
4704     inputTensorInfo2.SetQuantizationOffset(offset);
4705
4706     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
4707     outputTensorInfo.SetQuantizationScale(scale);
4708     outputTensorInfo.SetQuantizationOffset(offset);
4709
4710     // See dequantized values to the right.
4711     auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
4712     {
4713          63,  35,  77,  70,  56, 112, //  420, 224,  518,  469,  371, 763
4714         203,  28, 252, 168, 245,  91  // 1400, 175, 1743, 1155, 1694, 616
4715     }));
4716
4717     // See dequantized values to the right.
4718     auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
4719     {
4720          21,   7, 175, 231, 175, 210, // 126,   28, 1204, 1596, 1204, 1449
4721         126, 161,  63,  21, 105, 126  // 861, 1106,  420,  126,  714,  861
4722     }));
4723
4724     // See dequantized values to the right.
4725     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
4726     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>(
4727     {
4728          81,  39, 249, 255, 228, 255, //  546,  252, 1722, 2065(clamped), 1575, 2212(clamped)
4729         255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477
4730     }));
4731
4732     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
4733     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
4734     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4735
4736     armnn::AdditionQueueDescriptor data;
4737     armnn::WorkloadInfo info;
4738     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
4739     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
4740     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4741
4742     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
4743
4744     inputHandle1->Allocate();
4745     inputHandle2->Allocate();
4746     outputHandle->Allocate();
4747
4748     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
4749     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
4750
4751     workloadFactory.Finalize();
4752     workload->Execute();
4753
4754     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
4755
4756     return result;
4757 }
4758
4759 namespace
4760 {
4761 LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory,
4762                                                           const unsigned int shape0[4],
4763                                                           const std::vector<uint8_t> & values0,
4764                                                           float scale0,
4765                                                           int32_t offset0,
4766                                                           const unsigned int shape1[4],
4767                                                           const std::vector<uint8_t> & values1,
4768                                                           float scale1,
4769                                                           int32_t offset1,
4770                                                           const unsigned int outShape[4],
4771                                                           const std::vector<uint8_t> & outValues,
4772                                                           float outScale,
4773                                                           int32_t outOffset)
4774 {
4775     armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8);
4776     armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8);
4777     armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8);
4778
4779     inputTensorInfo0.SetQuantizationScale(scale0);
4780     inputTensorInfo0.SetQuantizationOffset(offset0);
4781
4782     inputTensorInfo1.SetQuantizationScale(scale1);
4783     inputTensorInfo1.SetQuantizationOffset(offset1);
4784
4785     outputTensorInfo.SetQuantizationScale(outScale);
4786     outputTensorInfo.SetQuantizationOffset(outOffset);
4787
4788     auto input0 = MakeTensor<uint8_t, 4>(inputTensorInfo0, values0);
4789     auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, values1);
4790
4791     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
4792     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, outValues);
4793
4794     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
4795     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
4796     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4797
4798     armnn::MultiplicationQueueDescriptor data;
4799     armnn::WorkloadInfo info;
4800     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
4801     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
4802     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4803
4804     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
4805
4806     inputHandle0->Allocate();
4807     inputHandle1->Allocate();
4808     outputHandle->Allocate();
4809
4810     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
4811     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
4812
4813     workloadFactory.Finalize();
4814     workload->Execute();
4815
4816     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
4817
4818     return result;
4819 }
4820 } // anonymous namespace
4821
4822 LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
4823 {
4824     unsigned int batchSize = 1;
4825     unsigned int channels = 2;
4826     unsigned int height = 2;
4827     unsigned int width = 3;
4828     const unsigned int shape[] = { batchSize, channels, height, width };
4829
4830     // See dequantized values to the right.
4831     std::vector<uint8_t> input0({
4832          62,  37,   3, 172,  13, 111, // 244, 144,   8, 684,  48, 440,
4833         188,  20,  73,  31,  23,  31  // 748,  76, 288, 120,  88, 120
4834     });
4835
4836     // See dequantized values to the right.
4837     std::vector<uint8_t> input1({
4838         126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
4839          48, 115, 151,  79,  78,  97  // 150, 351, 459, 243, 240, 297
4840     });
4841
4842     // See dequantized values to the right.
4843     std::vector<uint8_t> output(
4844     {
4845          64,  72,   0, 255,   8, 236, //  93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
4846          77,  15,  92,  16,  10,  21, // 112200,  26676,        132192,           29160, 21120,  35640
4847     });
4848
4849     return MultiplicationUint8TestHelper(workloadFactory,
4850                                          shape,
4851                                          input0,
4852                                          4.0f,
4853                                          1,
4854                                          shape,
4855                                          input1,
4856                                          3.0f,
4857                                          -2,
4858                                          shape,
4859                                          output,
4860                                          1366.255f, // Scale/offset chosen to have output values out of range.
4861                                          -5);
4862 }
4863
4864 LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
4865 {
4866     const unsigned int shape0[] = { 1, 2, 2, 3 };
4867     const unsigned int shape1[] = { 1, 1, 1, 1 };
4868
4869     std::vector<uint8_t> input0({
4870         1, 2, 3,    4,  5,  6,
4871         7, 8, 9,   10, 11, 12
4872     });
4873
4874     std::vector<uint8_t> input1({2});
4875
4876     std::vector<uint8_t> output({
4877         2,  4,   6,     8, 10, 12,
4878         14, 16, 18,    20, 22, 24
4879     });
4880
4881     return MultiplicationUint8TestHelper(workloadFactory,
4882                                          shape0,
4883                                          input0,
4884                                          1.0f,
4885                                          0,
4886                                          shape1,
4887                                          input1,
4888                                          1.0f,
4889                                          0,
4890                                          shape0,
4891                                          output,
4892                                          1.0f,
4893                                          0);
4894 }
4895
4896 LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory)
4897 {
4898     const unsigned int shape0[] = { 1, 2, 2, 3 };
4899     const unsigned int shape1[] = { 1, 1, 1, 3 };
4900
4901     std::vector<uint8_t> input0({
4902         1, 2, 3,    4,  5,  6,
4903         7, 8, 9,   10, 11, 12
4904     });
4905
4906     std::vector<uint8_t> input1({1, 2, 3});
4907
4908     std::vector<uint8_t> output({
4909         1,  4,   9,     4, 10, 18,
4910         7, 16,  27,    10, 22, 36
4911     });
4912
4913     return MultiplicationUint8TestHelper(workloadFactory,
4914                                          shape0,
4915                                          input0,
4916                                          1.0f,
4917                                          0,
4918                                          shape1,
4919                                          input1,
4920                                          1.0f,
4921                                          0,
4922                                          shape0,
4923                                          output,
4924                                          1.0f,
4925                                          0);
4926 }
4927
4928 namespace
4929 {
4930 template <typename T>
4931 LayerTestResult<T, 4> SubtractionTestHelper(armnn::IWorkloadFactory& workloadFactory,
4932                                             const unsigned int shape0[4],
4933                                             const std::vector<T>& values0,
4934                                             float scale0,
4935                                             int32_t offset0,
4936                                             const unsigned int shape1[4],
4937                                             const std::vector<T> & values1,
4938                                             float scale1,
4939                                             int32_t offset1,
4940                                             const unsigned int outShape[4],
4941                                             const std::vector<T> & outValues,
4942                                             float outScale,
4943                                             int32_t outOffset)
4944 {
4945     auto dataType = (std::is_same<T, uint8_t>::value ?
4946                      armnn::DataType::QuantisedAsymm8 :
4947                      armnn::DataType::Float32);
4948
4949     armnn::TensorInfo inputTensorInfo0(4, shape0, dataType);
4950     armnn::TensorInfo inputTensorInfo1(4, shape1, dataType);
4951     armnn::TensorInfo outputTensorInfo(4, outShape, dataType);
4952
4953     inputTensorInfo0.SetQuantizationScale(scale0);
4954     inputTensorInfo0.SetQuantizationOffset(offset0);
4955
4956     inputTensorInfo1.SetQuantizationScale(scale1);
4957     inputTensorInfo1.SetQuantizationOffset(offset1);
4958
4959     outputTensorInfo.SetQuantizationScale(outScale);
4960     outputTensorInfo.SetQuantizationOffset(outOffset);
4961
4962     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0);
4963     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1);
4964
4965     LayerTestResult<T, 4> result(outputTensorInfo);
4966     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues);
4967
4968     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
4969     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
4970     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4971
4972     armnn::SubtractionQueueDescriptor data;
4973     armnn::WorkloadInfo info;
4974     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
4975     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
4976     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4977
4978     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSubtraction(data, info);
4979
4980     inputHandle0->Allocate();
4981     inputHandle1->Allocate();
4982     outputHandle->Allocate();
4983
4984     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
4985     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
4986
4987     workloadFactory.Finalize();
4988     workload->Execute();
4989
4990     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
4991
4992     return result;
4993 }
4994 } // anonymous namespace
4995
4996 LayerTestResult<uint8_t, 4> SubtractionUint8Test(armnn::IWorkloadFactory& workloadFactory)
4997 {
4998     const unsigned int shape0[] = { 1, 1, 2, 2 };
4999     const unsigned int shape1[] = { 1, 1, 2, 2 };
5000
5001     std::vector<uint8_t> input0({ 10, 12, 14, 16 });
5002     std::vector<uint8_t> input1({ 1, 2, 1, 2 });
5003     std::vector<uint8_t> output({ 3, 3, 5, 5 });
5004
5005     return SubtractionTestHelper(workloadFactory,
5006                                  shape0, input0, 0.5f, 2,
5007                                  shape1, input1, 1.0f, 0,
5008                                  shape0, output, 1.0f, 0);
5009 }
5010
5011 LayerTestResult<uint8_t, 4> SubtractionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
5012 {
5013     const unsigned int shape0[] = { 1, 1, 2, 2 };
5014     const unsigned int shape1[] = { 1, 1, 1, 1 };
5015
5016     std::vector<uint8_t> input0({ 10, 12, 14, 16 });
5017     std::vector<uint8_t> input1({ 2 });
5018     std::vector<uint8_t> output({ 5, 6, 7, 8 });
5019
5020     return SubtractionTestHelper(workloadFactory,
5021                                  shape0, input0, 0.5f, 2,
5022                                  shape1, input1, 1.0f, 0,
5023                                  shape0, output, 1.0f, 3);
5024 }
5025
5026 LayerTestResult<uint8_t, 4> SubtractionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory)
5027 {
5028     const unsigned int shape0[] = { 1, 1, 2, 2 };
5029     const unsigned int shape1[] = { 1, 1, 2, 1 };
5030
5031     std::vector<uint8_t> input0({ 10, 12, 14, 16 });
5032     std::vector<uint8_t> input1({ 2, 1 });
5033     std::vector<uint8_t> output({ 8, 11, 12, 15 });
5034
5035     return SubtractionTestHelper(workloadFactory,
5036                                  shape0, input0, 1.0f, 0,
5037                                  shape1, input1, 1.0f, 0,
5038                                  shape0, output, 1.0f, 0);
5039 }
5040
5041 LayerTestResult<float, 4> SubtractionTest(armnn::IWorkloadFactory& workloadFactory)
5042 {
5043     const unsigned int shape0[] = { 1, 1, 2, 2 };
5044     const unsigned int shape1[] = { 1, 1, 2, 2 };
5045
5046     std::vector<float> input0({ 1,  2, 3, 4 });
5047     std::vector<float> input1({ 1, -1, 0, 2 });
5048     std::vector<float> output({ 0,  3, 3, 2 });
5049
5050     return SubtractionTestHelper(workloadFactory,
5051                                  shape0, input0, 1.0f, 0,
5052                                  shape1, input1, 1.0f, 0,
5053                                  shape0, output, 1.0f, 0);
5054 }
5055
5056 LayerTestResult<float, 4> SubtractionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
5057 {
5058     const unsigned int shape0[] = { 1, 1, 2, 2 };
5059     const unsigned int shape1[] = { 1, 1, 1, 1 };
5060
5061     std::vector<float> input0({ 1,  2, 3, 4 });
5062     std::vector<float> input1({ 10 });
5063     std::vector<float> output({ -9,  -8, -7, -6 });
5064
5065     return SubtractionTestHelper(workloadFactory,
5066                                  shape0, input0, 1.0f, 0,
5067                                  shape1, input1, 1.0f, 0,
5068                                  shape0, output, 1.0f, 0);
5069 }
5070
5071 LayerTestResult<float, 4> SubtractionBroadcastTest(armnn::IWorkloadFactory& workloadFactory)
5072 {
5073     const unsigned int shape0[] = { 1, 1, 2, 2 };
5074     const unsigned int shape1[] = { 1, 1, 1, 2 };
5075
5076     std::vector<float> input0({ 1,  2, 3, 4 });
5077     std::vector<float> input1({ 10, -5 });
5078     std::vector<float> output({ -9,  7, -7, 9 });
5079
5080     return SubtractionTestHelper(workloadFactory,
5081                                  shape0, input0, 1.0f, 0,
5082                                  shape1, input1, 1.0f, 0,
5083                                  shape0, output, 1.0f, 0);
5084 }
5085
5086 LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory)
5087 {
5088     constexpr unsigned int inputWidth = 4;
5089     constexpr unsigned int inputHeight = 4;
5090     constexpr unsigned int inputChannels = 1;
5091     constexpr unsigned int inputBatchSize = 1;
5092
5093     constexpr unsigned int outputWidth = inputWidth;
5094     constexpr unsigned int outputHeight = inputHeight;
5095     constexpr unsigned int outputChannels = inputChannels;
5096     constexpr unsigned int outputBatchSize = inputBatchSize;
5097
5098     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
5099         armnn::DataType::QuantisedAsymm8);
5100     inputTensorInfo.SetQuantizationScale(1.5f);
5101     inputTensorInfo.SetQuantizationOffset(-3);
5102
5103     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
5104         armnn::DataType::QuantisedAsymm8);
5105     outputTensorInfo.SetQuantizationScale(1.5f);
5106     outputTensorInfo.SetQuantizationOffset(-3);
5107
5108     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
5109         1, 2, 3, 4,
5110         2, 3, 4, 5,
5111         3, 4, 5, 6,
5112         4, 5, 6, 7
5113     }));
5114
5115     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
5116     result.outputExpected = input;
5117
5118     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
5119     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5120
5121     armnn::ResizeBilinearQueueDescriptor descriptor;
5122     armnn::WorkloadInfo info;
5123     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
5124     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
5125
5126     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
5127
5128     inputHandle->Allocate();
5129     outputHandle->Allocate();
5130     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
5131
5132     workloadFactory.Finalize();
5133     workload->Execute();
5134
5135     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
5136     return result;
5137 }
5138
5139 LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory)
5140 {
5141     constexpr unsigned int inputWidth = 2;
5142     constexpr unsigned int inputHeight = 2;
5143     constexpr unsigned int inputChannels = 1;
5144     constexpr unsigned int inputBatchSize = 1;
5145
5146     constexpr unsigned int outputWidth = inputWidth / 2;
5147     constexpr unsigned int outputHeight = inputHeight / 2;
5148     constexpr unsigned int outputChannels = inputChannels;
5149     constexpr unsigned int outputBatchSize = inputBatchSize;
5150
5151     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
5152         armnn::DataType::QuantisedAsymm8);
5153     inputTensorInfo.SetQuantizationScale(0.1567f);
5154     inputTensorInfo.SetQuantizationOffset(1);
5155
5156     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
5157         armnn::DataType::QuantisedAsymm8);
5158     outputTensorInfo.SetQuantizationScale(0.1567f);
5159     outputTensorInfo.SetQuantizationOffset(1);
5160
5161     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
5162         1, 255,
5163         200, 250
5164     }));
5165
5166     // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
5167     // then figures out the interpolants and weights. Note this is different to projecting the centre of the
5168     // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value
5169     // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting
5170     // the centre).
5171     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
5172     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
5173         1
5174     }));
5175
5176     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
5177     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5178
5179     armnn::ResizeBilinearQueueDescriptor descriptor;
5180     armnn::WorkloadInfo info;
5181     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
5182     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
5183
5184     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
5185
5186     inputHandle->Allocate();
5187     outputHandle->Allocate();
5188     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
5189
5190     workloadFactory.Finalize();
5191     workload->Execute();
5192
5193     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
5194     return result;
5195 }
5196
5197 LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory)
5198 {
5199     constexpr unsigned int inputWidth = 4;
5200     constexpr unsigned int inputHeight = 4;
5201     constexpr unsigned int inputChannels = 1;
5202     constexpr unsigned int inputBatchSize = 1;
5203
5204     constexpr unsigned int outputWidth = inputWidth / 2;
5205     constexpr unsigned int outputHeight = inputHeight / 2;
5206     constexpr unsigned int outputChannels = inputChannels;
5207     constexpr unsigned int outputBatchSize = inputBatchSize;
5208
5209     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
5210         armnn::DataType::QuantisedAsymm8);
5211     inputTensorInfo.SetQuantizationScale(3.141592f);
5212     inputTensorInfo.SetQuantizationOffset(3);
5213
5214     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
5215         armnn::DataType::QuantisedAsymm8);
5216     outputTensorInfo.SetQuantizationScale(3.141592f);
5217     outputTensorInfo.SetQuantizationOffset(3);
5218
5219     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
5220         1, 2, 3, 4,
5221         2, 3, 4, 5,
5222         3, 4, 5, 6,
5223         4, 5, 6, 7
5224     }));
5225
5226     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
5227     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
5228         1, 3,
5229         3, 5
5230     }));
5231
5232     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
5233     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5234
5235     armnn::ResizeBilinearQueueDescriptor descriptor;
5236     armnn::WorkloadInfo info;
5237     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
5238     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
5239
5240     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
5241
5242     inputHandle->Allocate();
5243     outputHandle->Allocate();
5244     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
5245
5246     workloadFactory.Finalize();
5247     workload->Execute();
5248
5249     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
5250     return result;
5251 }
5252
5253 LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory)
5254 {
5255     constexpr unsigned int inputWidth = 3;
5256     constexpr unsigned int inputHeight = 2;
5257     constexpr unsigned int inputChannels = 1;
5258     constexpr unsigned int inputBatchSize = 1;
5259
5260     constexpr unsigned int outputWidth = 2;
5261     constexpr unsigned int outputHeight = 1;
5262     constexpr unsigned int outputChannels = inputChannels;
5263     constexpr unsigned int outputBatchSize = inputBatchSize;
5264
5265     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
5266         armnn::DataType::QuantisedAsymm8);
5267     inputTensorInfo.SetQuantizationScale(1.5f);
5268     inputTensorInfo.SetQuantizationOffset(-1);
5269
5270     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
5271         armnn::DataType::QuantisedAsymm8);
5272     outputTensorInfo.SetQuantizationScale(1.5f);
5273     outputTensorInfo.SetQuantizationOffset(-1);
5274
5275     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
5276         1,  2,  3, // 3.0, 4.5, 6.0
5277         5,  8, 13  // 9.0, 13.5, 21.0
5278     }));
5279
5280     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
5281     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
5282         1, 3 // 3.0, 5.25
5283     }));
5284
5285     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
5286     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5287
5288     armnn::ResizeBilinearQueueDescriptor descriptor;
5289     armnn::WorkloadInfo info;
5290     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
5291     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
5292
5293     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
5294
5295     inputHandle->Allocate();
5296     outputHandle->Allocate();
5297
5298     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
5299
5300     workloadFactory.Finalize();
5301     workload->Execute();
5302
5303     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
5304     return result;
5305 }
5306
5307 LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory)
5308 {
5309     constexpr unsigned int inputWidth = 2;
5310     constexpr unsigned int inputHeight = 3;
5311     constexpr unsigned int inputChannels = 1;
5312     constexpr unsigned int inputBatchSize = 1;
5313
5314     constexpr unsigned int outputWidth = 5;
5315     constexpr unsigned int outputHeight = 3;
5316     constexpr unsigned int outputChannels = inputChannels;
5317     constexpr unsigned int outputBatchSize = inputBatchSize;
5318
5319     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
5320         armnn::DataType::QuantisedAsymm8);
5321     inputTensorInfo.SetQuantizationScale(0.010765f);
5322     inputTensorInfo.SetQuantizationOffset(7);
5323
5324     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
5325         armnn::DataType::QuantisedAsymm8);
5326     outputTensorInfo.SetQuantizationScale(0.010132f);
5327     outputTensorInfo.SetQuantizationOffset(-18);
5328
5329     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
5330          24, 228, // 0.183005, 2.379065,
5331         105, 128, // 1.05497, 1.302565
5332         230,  71  // 2.400595, 0.68896
5333     }));
5334
5335     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
5336     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
5337           0,  87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504
5338          86,  96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498
5339         219, 151,  84,  50,  50  // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002
5340     }));
5341
5342     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
5343     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5344
5345     armnn::ResizeBilinearQueueDescriptor descriptor;
5346     armnn::WorkloadInfo info;
5347     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
5348     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
5349
5350     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
5351
5352     inputHandle->Allocate();
5353     outputHandle->Allocate();
5354     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
5355
5356     workloadFactory.Finalize();
5357     workload->Execute();
5358
5359     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
5360     return result;
5361 }
5362
5363 LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory)
5364 {
5365     // BatchSize: 1
5366     // Channels: 2
5367     // Height: 3
5368     // Width: 2
5369
5370     const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
5371     std::vector<float> inputValues
5372     {
5373         // Batch 0, Channel 0, Height (3) x Width (2)
5374          1.f, 4.f,
5375          4.f, 2.f,
5376          1.f, 6.f,
5377
5378         // Batch 0, Channel 1, Height (3) x Width (2)
5379          1.f, 1.f,
5380          4.f, 1.f,
5381         -2.f, 4.f
5382     };
5383     std::vector<float> expectedOutputValues
5384     {
5385         // Batch 0, Channel 0, Height (3) x Width (2)
5386         1.f, 4.f,
5387         4.f, 2.f,
5388         1.f, 6.f,
5389
5390         // Batch 0, Channel 1, Height (3) x Width (2)
5391         3.f, 3.f,
5392         4.f, 3.f,
5393         2.f, 4.f
5394     };
5395
5396     return BatchNormTestImpl<float>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
5397                                     0.f, 0, armnn::DataLayout::NCHW);
5398 }
5399
5400 LayerTestResult<float, 4> BatchNormNhwcTest(armnn::IWorkloadFactory& workloadFactory)
5401 {
5402     // BatchSize: 1
5403     // Height: 3
5404     // Width: 2
5405     // Channels: 2
5406
5407     const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
5408     std::vector<float> inputValues
5409     {
5410         // Batch 0, Height 0, Width (2) x Channel (2)
5411         1.f,  1.f,
5412         4.f,  1.f,
5413
5414         // Batch 0, Height 1, Width (2) x Channel (2)
5415         4.f,  4.f,
5416         2.f,  1.f,
5417
5418         // Batch 0, Height 2, Width (2) x Channel (2)
5419         1.f, -2.f,
5420         6.f,  4.f
5421     };
5422     std::vector<float> expectedOutputValues
5423     {
5424         // Batch 0, Height 0, Width (2) x Channel (2)
5425         1.f, 3.f,
5426         4.f, 3.f,
5427
5428         // Batch 0, Height 1, Width (2) x Channel (2)
5429         4.f, 4.f,
5430         2.f, 3.f,
5431
5432         // Batch 0, Height 2, Width (2) x Channel (2)
5433         1.f, 2.f,
5434         6.f, 4.f
5435     };
5436
5437     return BatchNormTestImpl<float>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
5438                                     0.f, 0, armnn::DataLayout::NHWC);
5439 }
5440
5441 LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory)
5442 {
5443     // BatchSize: 1
5444     // Channels: 2
5445     // Height: 3
5446     // Width: 2
5447
5448     const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
5449     std::vector<float> inputValues
5450     {
5451         // Batch 0, Channel 0, Height (3) x Width (2)
5452          1.f, 4.f,
5453          4.f, 2.f,
5454          1.f, 6.f,
5455
5456         // Batch 0, Channel 1, Height (3) x Width (2)
5457          1.f, 1.f,
5458          4.f, 1.f,
5459         -2.f, 4.f
5460     };
5461     std::vector<float> expectedOutputValues
5462     {
5463         // Batch 0, Channel 0, Height (3) x Width (2)
5464         1.f, 4.f,
5465         4.f, 2.f,
5466         1.f, 6.f,
5467
5468         // Batch 0, Channel 1, Height (3) x Width (2)
5469         3.f, 3.f,
5470         4.f, 3.f,
5471         2.f, 4.f
5472     };
5473
5474     return BatchNormTestImpl<uint8_t>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
5475                                       1.f/20.f, 50, armnn::DataLayout::NCHW);
5476 }
5477
5478 LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(armnn::IWorkloadFactory& workloadFactory)
5479 {
5480     // BatchSize: 1
5481     // Height: 3
5482     // Width: 2
5483     // Channels: 2
5484
5485     const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
5486     std::vector<float> inputValues
5487     {
5488         // Batch 0, Height 0, Width (2) x Channel (2)
5489         1.f,  1.f,
5490         4.f,  1.f,
5491
5492         // Batch 0, Height 1, Width (2) x Channel (2)
5493         4.f,  4.f,
5494         2.f,  1.f,
5495
5496         // Batch 0, Height 2, Width (2) x Channel (2)
5497         1.f, -2.f,
5498         6.f,  4.f
5499     };
5500     std::vector<float> expectedOutputValues
5501     {
5502         // Batch 0, Height 0, Width (2) x Channel (2)
5503         1.f, 3.f,
5504         4.f, 3.f,
5505
5506         // Batch 0, Height 1, Width (2) x Channel (2)
5507         4.f, 4.f,
5508         2.f, 3.f,
5509
5510         // Batch 0, Height 2, Width (2) x Channel (2)
5511         1.f, 2.f,
5512         6.f, 4.f
5513     };
5514
5515     return BatchNormTestImpl<uint8_t>(workloadFactory, inputOutputShape, inputValues, expectedOutputValues,
5516                                       1.f/20.f, 50, armnn::DataLayout::NHWC);
5517 }
5518
5519 LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory)
5520 {
5521     return ConstantTestImpl<uint8_t>(workloadFactory, 2e-6f, 1);
5522 }
5523
5524 LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory)
5525 {
5526     return Concatenation1dTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5527 }
5528
5529 LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5530 {
5531     return Concatenation2dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5532 }
5533
5534 LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5535 {
5536     return Concatenation2dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5537 }
5538
5539 LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
5540 {
5541     return Concatenation2dDim0DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5542 }
5543
5544 LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
5545 {
5546     return Concatenation2dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5547 }
5548
5549 LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5550 {
5551     return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5552 }
5553
5554 LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5555 {
5556     return Concatenation3dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5557 }
5558
5559 LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5560 {
5561     return Concatenation3dDim2TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5562 }
5563
5564 LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
5565 {
5566     return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5567 }
5568
5569 LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
5570 {
5571     return Concatenation3dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5572 }
5573
5574 LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
5575 {
5576     return Concatenation3dDim2DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
5577 }
5578
5579 LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
5580                                                                  bool forceNoPadding)
5581 {
5582     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
5583 }
5584
5585 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory,
5586                                                                         bool forceNoPadding)
5587 {
5588     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<uint8_t>(workloadFactory, forceNoPadding, 3.0f, -5);
5589 }
5590
5591 LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory,
5592                                                                  bool forceNoPadding)
5593 {
5594     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<float>(workloadFactory, forceNoPadding);
5595 }
5596
5597 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory,
5598                                                                         bool forceNoPadding)
5599 {
5600     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<uint8_t>(workloadFactory, forceNoPadding, 0.1f, 128);
5601 }
5602
5603 LayerTestResult<float, 4> SimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory,
5604                                                  const armnn::DataLayoutIndexed& dataLayout)
5605 {
5606     return SimpleMaxPooling2dTestCommon<float>(workloadFactory, dataLayout);
5607 }
5608
5609 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
5610                                                         const armnn::DataLayoutIndexed& dataLayout)
5611 {
5612     return SimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, dataLayout);
5613 }
5614
5615 LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory,
5616                                                      const armnn::DataLayoutIndexed& dataLayout)
5617 {
5618     return SimpleAveragePooling2dTestCommon<float>(workloadFactory, dataLayout);
5619 }
5620
5621 LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
5622                                                             const armnn::DataLayoutIndexed& dataLayout)
5623 {
5624     return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, dataLayout, 0.5, -1);
5625 }
5626
5627 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
5628                                                                             bool forceNoPadding)
5629 {
5630     return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
5631 }
5632
5633 LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
5634 {
5635     return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory);
5636 }
5637
5638 LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
5639 {
5640     return LargeTensorsAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
5641 }
5642
5643 LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory,
5644                                                 const armnn::DataLayoutIndexed& dataLayout)
5645 {
5646     return SimpleL2Pooling2dTestCommon<float>(workloadFactory, dataLayout);
5647 }
5648
5649 LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
5650                                                        const armnn::DataLayoutIndexed& dataLayout)
5651 {
5652     return SimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory, dataLayout);
5653 }
5654
5655 LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory)
5656 {
5657     return L2Pooling2dSize3Stride1TestCommon<float>(workloadFactory);
5658 }
5659
5660 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5661 {
5662     return L2Pooling2dSize3Stride1TestCommon<uint8_t>(workloadFactory);
5663 }
5664
5665 LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory)
5666 {
5667     return L2Pooling2dSize3Stride3TestCommon<float>(workloadFactory);
5668 }
5669
5670 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5671 {
5672     return L2Pooling2dSize3Stride3TestCommon<uint8_t>(workloadFactory);
5673 }
5674
5675 LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory)
5676 {
5677     return L2Pooling2dSize3Stride4TestCommon<float>(workloadFactory);
5678 }
5679
5680 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5681 {
5682     return L2Pooling2dSize3Stride4TestCommon<uint8_t>(workloadFactory);
5683 }
5684
5685 LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory)
5686 {
5687     return L2Pooling2dSize7TestCommon<float>(workloadFactory);
5688 }
5689
5690 LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5691 {
5692     return L2Pooling2dSize7TestCommon<uint8_t>(workloadFactory);
5693 }
5694
5695 LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory)
5696 {
5697     return L2Pooling2dSize9TestCommon<float>(workloadFactory);
5698 }
5699
5700 LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5701 {
5702     return L2Pooling2dSize9TestCommon<uint8_t>(workloadFactory);
5703 }
5704
5705 LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
5706 {
5707     return AsymmetricNonSquarePooling2dTestCommon<float>(workloadFactory);
5708 }
5709
5710 LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
5711 {
5712     return AsymmetricNonSquarePooling2dTestCommon<uint8_t>(workloadFactory);
5713 }
5714
5715 LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory,
5716                                                armnn::IWorkloadFactory& refWorkloadFactory,
5717                                                armnn::PoolingAlgorithm  poolingType)
5718 {
5719     return ComparePooling2dTestCommon<float>(workloadFactory, refWorkloadFactory, poolingType);
5720 }
5721
5722 LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
5723                                                       armnn::IWorkloadFactory& refWorkloadFactory,
5724                                                       armnn::PoolingAlgorithm  poolingType)
5725 {
5726     return ComparePooling2dTestCommon<uint8_t>(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128);
5727 }
5728
5729 LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory,
5730                                                   bool transposeWeights)
5731 {
5732     return FullyConnectedLargeTestCommon<float>(workloadFactory, transposeWeights);
5733 }
5734
5735 LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory)
5736 {
5737     return IgnorePaddingSimpleMaxPooling2dTestCommon<float>(workloadFactory);
5738 }
5739
5740 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
5741 {
5742     return IgnorePaddingSimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, 1.0f, -5);
5743 }
5744
5745 LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
5746 {
5747     return IgnorePaddingMaxPooling2dSize3TestCommon<float>(workloadFactory);
5748 }
5749
5750 LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5751 {
5752     return IgnorePaddingMaxPooling2dSize3TestCommon<uint8_t>(workloadFactory, 1.0f, -5);
5753 }
5754
5755 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
5756 {
5757     return IgnorePaddingSimpleAveragePooling2dTestCommon<float>(workloadFactory);
5758 }
5759
5760 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
5761 {
5762     return IgnorePaddingSimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory);
5763 }
5764
5765 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory)
5766 {
5767     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<float>(workloadFactory);
5768 }
5769
5770 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
5771     armnn::IWorkloadFactory& workloadFactory)
5772 {
5773     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<uint8_t>(workloadFactory);
5774 }
5775
5776 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
5777 {
5778     return IgnorePaddingAveragePooling2dSize3TestCommon<float>(workloadFactory);
5779 }
5780
5781 LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5782 {
5783     return IgnorePaddingAveragePooling2dSize3TestCommon<uint8_t>(workloadFactory);
5784 }
5785
5786 LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory)
5787 {
5788     return IgnorePaddingSimpleL2Pooling2dTestCommon<float>(workloadFactory);
5789 }
5790
5791 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
5792 {
5793     return IgnorePaddingSimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory);
5794 }
5795
5796 LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
5797 {
5798     return IgnorePaddingL2Pooling2dSize3TestCommon<float>(workloadFactory);
5799 }
5800
5801 LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
5802 {
5803     return IgnorePaddingL2Pooling2dSize3TestCommon<uint8_t>(workloadFactory);
5804 }
5805
5806 LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory)
5807 {
5808     return SimplePermuteFloat32TestCommon(workloadFactory);
5809 };
5810
5811 LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory)
5812 {
5813     return SimplePermuteUint8TestCommon(workloadFactory);
5814 };
5815
5816 LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory)
5817 {
5818     return PermuteFloat32ValueSet1TestCommon(workloadFactory);
5819 };
5820
5821 LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory)
5822 {
5823     return PermuteFloat32ValueSet2TestCommon(workloadFactory);
5824 };
5825
5826 LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory)
5827 {
5828     return PermuteFloat32ValueSet3TestCommon(workloadFactory);
5829 };
5830
5831 namespace
5832 {
5833
5834 template <typename T, std::size_t InputDim, std::size_t OutputDim>
5835 LayerTestResult<T, OutputDim> MeanTestHelper(armnn::IWorkloadFactory& workloadFactory,
5836                                              const unsigned int* inputShape,
5837                                              const std::vector<T>& inputData,
5838                                              const std::vector<unsigned int>& axis,
5839                                              bool keepDims,
5840                                              const unsigned int* outputShape,
5841                                              const std::vector<T>& outputData,
5842                                              float scale = 1.0f,
5843                                              int32_t offset = 0)
5844 {
5845     auto dataType = (std::is_same<T, uint8_t>::value ? armnn::DataType::QuantisedAsymm8 : armnn::DataType::Float32);
5846
5847     armnn::TensorInfo inputTensorInfo(InputDim, inputShape, dataType);
5848     armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, dataType);
5849
5850     inputTensorInfo.SetQuantizationScale(scale);
5851     inputTensorInfo.SetQuantizationOffset(offset);
5852
5853     outputTensorInfo.SetQuantizationScale(scale);
5854     outputTensorInfo.SetQuantizationOffset(offset);
5855
5856     auto input = MakeTensor<T, InputDim>(inputTensorInfo, inputData);
5857
5858     LayerTestResult<T, OutputDim> result(outputTensorInfo);
5859     result.outputExpected = MakeTensor<T, OutputDim>(outputTensorInfo, outputData);
5860
5861     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
5862     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5863
5864     armnn::MeanQueueDescriptor data;
5865     data.m_Parameters.m_Axis = axis;
5866     data.m_Parameters.m_KeepDims = keepDims;
5867     armnn::WorkloadInfo info;
5868     AddInputToWorkload(data,  info, inputTensorInfo, inputHandle.get());
5869     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
5870
5871     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMean(data, info);
5872
5873     inputHandle->Allocate();
5874     outputHandle->Allocate();
5875
5876     CopyDataToITensorHandle(inputHandle.get(), input.origin());
5877
5878     workloadFactory.Finalize();
5879     workload->Execute();
5880
5881     CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
5882
5883     return result;
5884 }
5885
5886 } // anonymous namespace
5887
5888 LayerTestResult<uint8_t, 1> MeanUint8SimpleTest(armnn::IWorkloadFactory& workloadFactory)
5889 {
5890     const unsigned int inputShape[] = { 3, 2 };
5891     const unsigned int outputShape[] = { 1 };
5892
5893     std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
5894     std::vector<uint8_t> output({ 2 });
5895
5896     return MeanTestHelper<uint8_t, 2, 1>(workloadFactory, inputShape, input, {}, false, outputShape, output);
5897 }
5898
5899 LayerTestResult<uint8_t, 3> MeanUint8SimpleAxisTest(armnn::IWorkloadFactory& workloadFactory)
5900 {
5901     const unsigned int inputShape[] = { 1, 1, 3, 2 };
5902     const unsigned int outputShape[] = { 1, 1, 2 };
5903
5904     std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
5905     std::vector<uint8_t> output({ 2, 2 });
5906
5907     return MeanTestHelper<uint8_t, 4, 3>(workloadFactory, inputShape, input, { 2 }, false, outputShape, output);
5908 }
5909
5910 LayerTestResult<uint8_t, 4> MeanUint8KeepDimsTest(armnn::IWorkloadFactory& workloadFactory)
5911 {
5912     const unsigned int inputShape[] = { 1, 1, 3, 2 };
5913     const unsigned int outputShape[] = { 1, 1, 1, 2 };
5914
5915     std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
5916     std::vector<uint8_t> output({ 2, 2 });
5917
5918     return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, { 2 }, true, outputShape, output);
5919 }
5920
5921 LayerTestResult<uint8_t, 4> MeanUint8MultipleDimsTest(armnn::IWorkloadFactory& workloadFactory)
5922 {
5923     const unsigned int inputShape[] = { 2, 3, 1, 2 };
5924     const unsigned int outputShape[] = { 1, 3, 1, 1 };
5925
5926     std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6 });
5927     std::vector<uint8_t> output({ 1, 3, 5 });
5928
5929     return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, { 0, 3 }, true, outputShape, output);
5930 }
5931
5932 LayerTestResult<uint8_t, 1> MeanVtsUint8Test(armnn::IWorkloadFactory& workloadFactory)
5933 {
5934     const unsigned int inputShape[] = { 4, 3, 2 };
5935     const unsigned int outputShape[] = { 2 };
5936
5937     std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
5938                                  24 });
5939     std::vector<uint8_t> output({ 12, 13 });
5940
5941     return MeanTestHelper<uint8_t, 3, 1>(workloadFactory, inputShape, input, { 0, 1 }, false, outputShape,
5942                                          output, 0.8f, 5);
5943 }
5944
5945 LayerTestResult<float, 1> MeanFloatSimpleTest(armnn::IWorkloadFactory& workloadFactory)
5946 {
5947     const unsigned int inputShape[] = { 3, 2 };
5948     const unsigned int outputShape[] = { 1 };
5949
5950     std::vector<float> input({ 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f });
5951     std::vector<float> output({ 2.0f });
5952
5953     return MeanTestHelper<float, 2, 1>(workloadFactory, inputShape, input, {}, false, outputShape, output);
5954 }
5955
5956 LayerTestResult<float, 3> MeanFloatSimpleAxisTest(armnn::IWorkloadFactory& workloadFactory)
5957 {
5958     const unsigned int inputShape[] = { 2, 3, 1, 2 };
5959     const unsigned int outputShape[] = { 3, 1, 2 };
5960
5961     std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f });
5962     std::vector<float> output({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f });
5963
5964     return MeanTestHelper<float, 4, 3>(workloadFactory, inputShape, input, { 0 }, false, outputShape, output);
5965 }
5966
5967 LayerTestResult<float, 4> MeanFloatKeepDimsTest(armnn::IWorkloadFactory& workloadFactory)
5968 {
5969     const unsigned int inputShape[] = { 1, 1, 3, 2 };
5970     const unsigned int outputShape[] = { 1, 1, 1, 2 };
5971
5972     std::vector<float> input({ 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f });
5973     std::vector<float> output({ 2.0f, 2.0f });
5974
5975     return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, { 2 }, true, outputShape, output);
5976 }
5977
5978 LayerTestResult<float, 4> MeanFloatMultipleDimsTest(armnn::IWorkloadFactory& workloadFactory)
5979 {
5980     const unsigned int inputShape[] = { 2, 3, 1, 2 };
5981     const unsigned int outputShape[] = { 1, 3, 1, 1 };
5982
5983     std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f });
5984     std::vector<float> output({ 1.5f, 3.5f, 5.5f });
5985
5986     return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, { 0, 3 }, true, outputShape, output);
5987 }
5988
5989 LayerTestResult<float, 1> MeanVtsFloat1Test(armnn::IWorkloadFactory& workloadFactory)
5990 {
5991     const unsigned int inputShape[] = { 4, 3, 2 };
5992     const unsigned int outputShape[] = { 2 };
5993
5994     std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
5995                                15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
5996     std::vector<float> output({ 12.0f, 13.0f });
5997
5998     return MeanTestHelper<float, 3, 1>(workloadFactory, inputShape, input, { 0, 1 }, false, outputShape, output);
5999 }
6000
6001 LayerTestResult<float, 3> MeanVtsFloat2Test(armnn::IWorkloadFactory& workloadFactory)
6002 {
6003     const unsigned int inputShape[] = { 4, 3, 2 };
6004     const unsigned int outputShape[] = { 1, 3, 1 };
6005
6006     std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
6007                                15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
6008     std::vector<float> output({ 10.5f, 12.5f, 14.5f });
6009
6010     return MeanTestHelper<float, 3, 3>(workloadFactory, inputShape, input, { 0, 2 }, true, outputShape, output);
6011 }
6012
6013 LayerTestResult<float, 3> MeanVtsFloat3Test(armnn::IWorkloadFactory& workloadFactory)
6014 {
6015     const unsigned int inputShape[] = { 1, 2, 2, 1 };
6016     const unsigned int outputShape[] = { 1, 2, 1 };
6017
6018     std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f });
6019     std::vector<float> output({ 1.5f, 3.5f });
6020
6021     return MeanTestHelper<float, 4, 3>(workloadFactory, inputShape, input, { 2 }, false, outputShape, output);
6022 }
6023
6024 LayerTestResult<float, 4> AdditionAfterMaxPoolTest(armnn::IWorkloadFactory& workloadFactory)
6025 {
6026     // Create Initial Tensor
6027     // 1, 2, 3
6028     // 4, 5, 6
6029     // 7, 8, 9
6030
6031     armnn::TensorInfo poolingInputTensorInfo({ 1, 1, 3, 3}, armnn::GetDataType<float>());
6032     armnn::TensorInfo poolingOutputTensorInfo({ 1, 1, 2, 2}, armnn::GetDataType<float>());
6033
6034     boost::multi_array<float, 4> poolingInput = MakeTensor<float,4>(poolingInputTensorInfo,
6035                                                             {1, 2, 3,
6036                                                              4, 5, 6,
6037                                                              7, 8, 9
6038                                                             });
6039
6040     std::unique_ptr<armnn::ITensorHandle> poolingInputHandle =
6041             workloadFactory.CreateTensorHandle(poolingInputTensorInfo);
6042     std::unique_ptr<armnn::ITensorHandle> poolingOutputHandle =
6043             workloadFactory.CreateTensorHandle(poolingOutputTensorInfo);
6044
6045     // Apply MaxPool poolSize = 1x1, stride=2x2
6046     // Result =
6047     // 1, 3
6048     // 7, 9
6049     armnn::Pooling2dDescriptor descriptor;
6050     descriptor.m_PoolHeight = 1;
6051     descriptor.m_PoolWidth = 1;
6052     descriptor.m_StrideX = 2;
6053     descriptor.m_StrideY = 2;
6054     descriptor.m_PoolType = armnn::PoolingAlgorithm::Max;
6055
6056     armnn::Pooling2dQueueDescriptor queueDescriptor;
6057     queueDescriptor.m_Parameters = descriptor;
6058     armnn::WorkloadInfo workloadInfo;
6059     AddInputToWorkload(queueDescriptor, workloadInfo, poolingInputTensorInfo, poolingInputHandle.get());
6060     AddOutputToWorkload(queueDescriptor, workloadInfo, poolingOutputTensorInfo, poolingOutputHandle.get());
6061
6062     // Create the MaxPool
6063     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(queueDescriptor, workloadInfo);
6064
6065     //LayerTestResult<float, 4> result(poolingOutputTensorInfo);
6066     auto shape( GetTensorShapeAsArray<4>(poolingOutputTensorInfo));
6067     boost::multi_array<float, 4> resultMaxPool;
6068     resultMaxPool.resize(shape);
6069
6070
6071     // Create addition with another tensor the same size
6072     // This would be the result to apply a Conv2d with kernel ones(2) and stride 1x1
6073     // with the initial tensor.
6074     // 12, 16
6075     // 24, 28
6076
6077     armnn::TensorInfo addInputTensorInfo({ 1,1,2,2}, armnn::GetDataType<float>());
6078     armnn::TensorInfo addOutputTensorInfo({ 1,1,2,2}, armnn::GetDataType<float>());
6079
6080     boost::multi_array<float, 4> addInput = MakeTensor<float,4>(addInputTensorInfo,
6081                                                                     {12, 16,
6082                                                                      24, 28,
6083                                                                     });
6084
6085     // Expected output tensor after MaxPool and Addition.
6086     LayerTestResult<float,4> addRet(addOutputTensorInfo);
6087     addRet.outputExpected = MakeTensor<float, 4>(addOutputTensorInfo, std::vector<float>(
6088             {
6089                     13, 19,
6090                     31, 37
6091             }));
6092
6093     std::unique_ptr<armnn::ITensorHandle> addInputHandle = workloadFactory.CreateTensorHandle(addInputTensorInfo);
6094     std::unique_ptr<armnn::ITensorHandle> addOutputHandle = workloadFactory.CreateTensorHandle(addOutputTensorInfo);
6095
6096     armnn::AdditionQueueDescriptor data;
6097     armnn::WorkloadInfo info;
6098
6099     // Add the output of the MaxPool and the new tensor
6100     AddInputToWorkload(data, info, poolingOutputTensorInfo, poolingOutputHandle.get());
6101     AddInputToWorkload(data, info, addInputTensorInfo, addInputHandle.get());
6102     AddOutputToWorkload(data, info, addOutputTensorInfo, addOutputHandle.get());
6103
6104     std::unique_ptr<armnn::IWorkload> addWorkload = workloadFactory.CreateAddition(data, info);
6105
6106     poolingInputHandle->Allocate();
6107     poolingOutputHandle->Allocate();
6108     addInputHandle->Allocate();
6109     addOutputHandle->Allocate();
6110
6111     CopyDataToITensorHandle(poolingInputHandle.get(), &poolingInput[0][0][0][0]);
6112     CopyDataFromITensorHandle(&resultMaxPool[0][0][0][0], poolingOutputHandle.get());
6113
6114     CopyDataToITensorHandle(poolingOutputHandle.get(), &resultMaxPool[0][0][0][0]);
6115     CopyDataToITensorHandle(addInputHandle.get(), &addInput[0][0][0][0]);
6116
6117     workload->Execute();
6118     addWorkload->Execute();
6119
6120     CopyDataFromITensorHandle(&addRet.output[0][0][0][0], addOutputHandle.get());
6121
6122     workloadFactory.Finalize();
6123
6124     return addRet;
6125 }