src/armnn/backends/test/LayerTests.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "LayerTests.hpp"
   6
   7 #include "test/TensorHelpers.hpp"
   8 #include "TensorCopyUtils.hpp"
   9 #include "Permute.hpp"
  10
  11 #include <boost/test/unit_test.hpp>
  12 #include <boost/assert.hpp>
  13
  14 #include "armnn/LayerSupport.hpp"
  15
  16 #include "backends/CpuTensorHandle.hpp"
  17 #include "backends/WorkloadFactory.hpp"
  18
  19 #ifdef ARMCOMPUTECL_ENABLED
  20 #include "backends/ClTensorHandle.hpp"
  21 #include "backends/ArmComputeTensorUtils.hpp"
  22 #endif
  23
  24 #include <algorithm>
  25 #include <boost/cast.hpp>
  26
  27 #include "WorkloadTestUtils.hpp"
  28 #include "Conv2dTestImpl.hpp"
  29 #include "BatchNormTestImpl.hpp"
  30 #include "ActivationTestImpl.hpp"
  31 #include "Pooling2dTestImpl.hpp"
  32 #include "ReshapeTestImpl.hpp"
  33 #include "FullyConnectedTestImpl.hpp"
  34 #include "SplitterTestImpl.hpp"
  35 #include "SoftmaxTestImpl.hpp"
  36 #include "NormTestImpl.hpp"
  37 #include "PermuteTestImpl.hpp"
  38 #include "LstmTestImpl.hpp"
  39 #include "ConvertFp16ToFp32TestImpl.hpp"
  40 #include "ConvertFp32ToFp16TestImpl.hpp"
  41
  42 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
  43 static std::vector<float> ConvInput3x8x16({
  44     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  45     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
  46     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  47     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  48     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  49     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  50     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  51     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
  52     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  53     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  55     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  56     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  57     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  58     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  59     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  60     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  61     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  62     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  63     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  64     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  65     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  66     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  67     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
  68 });
  69
  70 // 2-channel bias used by a number of Conv2d tests.
  71 static std::vector<float> Bias2({0, 2});
  72
  73 // Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled.
  74 template<typename T>
  75 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset)
  76 {
  77     if(biasEnabled)
  78     {
  79         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, armnn::GetDataType<T>());
  80         boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2));
  81         return bias;
  82     }
  83     else
  84     {
  85         return boost::multi_array<T, 1>();
  86     }
  87 }
  88
  89 template<typename T>
  90 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory,
  91                                                        float                    qScale,
  92                                                        int32_t                  qOffset,
  93                                                        bool                     biasEnabled)
  94 {
  95     // Use common single-batch 3-channel 16x8 image.
  96     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
  97     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
  98
  99     // Use a 2-element batch with 3-channel 3x5 kernels.
 100     armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType<T>());
 101     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
 102         QuantizedVector<T>(qScale, qOffset, {
 103             1, 1, 1,
 104             1, -1, 1,
 105             1, 1, 1,
 106             1, 1, 1,
 107             1, 1, 1,
 108
 109             0, 0, 0,
 110             0, 0, 0,
 111             0, 0, 0,
 112             0, 0, 0,
 113             0, 0, 0,
 114
 115             2, 2, 2,
 116             2, 2, 2,
 117             2, 2, 2,
 118             2, 2, 2,
 119             2, 2, 2,
 120
 121
 122             0, 0, 0,
 123             0, 0, 0,
 124             0, 0, 0,
 125             0, 0, 0,
 126             0, 0, 0,
 127
 128             1, 1, 1,
 129             1, 1, 1,
 130             1, 1, 1,
 131             1, 1, 1,
 132             1, 1, 1,
 133
 134             0, 0, 0,
 135             0, 0, 0,
 136             0, 0, 0,
 137             0, 0, 0,
 138             0, 0, 0
 139         })));
 140
 141     // Expected output is 2 batch elements of a 1-channel 14x4 image.
 142     armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType<T>());
 143     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
 144         QuantizedVector<T>(qScale, qOffset, {
 145             -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
 146             -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
 147             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
 148             -23.5f, -23.5f, -23.5f,
 149             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
 150             -23.5f, -23.5f, -23.5f,
 151
 152             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 153             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 154             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 155             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 156         })));
 157
 158     return SimpleConvolution2dTestImpl<T>(workloadFactory,
 159       input,
 160       kernel,
 161       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
 162       expectedOutput,
 163       qScale,
 164       qOffset);
 165 }
 166
 167 template<typename T>
 168 LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory,
 169                                                        float                    qScale,
 170                                                        int32_t                  qOffset,
 171                                                        bool                     biasEnabled)
 172 {
 173     // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
 174
 175     // Use common single-batch 3-channel 16x8 image.
 176     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
 177     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
 178
 179     // Use a 2-element batch of 3-channel 3x3 kernels.
 180     armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType<T>());
 181     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
 182         QuantizedVector<T>(qScale, qOffset, {
 183             1, 1, 1,
 184             1, -1, 1,
 185             1, 1, 1,
 186
 187             0, 0, 0,
 188             0, 0, 0,
 189             0, 0, 0,
 190
 191             2, 2, 2,
 192             2, 2, 2,
 193             2, 2, 2,
 194
 195
 196             0, 0, 0,
 197             0, 0, 0,
 198             0, 0, 0,
 199
 200             1, 1, 1,
 201             1, 1, 1,
 202             1, 1, 1,
 203
 204             0, 0, 0,
 205             0, 0, 0,
 206             0, 0, 0
 207         })));
 208
 209     // Expected output is 1 batch of a 2-channel 14x6 image.
 210     armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType<T>());
 211     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
 212         QuantizedVector<T>(qScale, qOffset, {
 213             -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
 214             -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
 215             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
 216             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
 217             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
 218             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
 219
 220             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 221             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 222             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 223             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 224             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 225             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 226         })));
 227
 228     return SimpleConvolution2dTestImpl<T>(workloadFactory,
 229       input,
 230       kernel,
 231       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
 232       expectedOutput,
 233       qScale,
 234       qOffset);
 235 }
 236
 237 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory,
 238                                                      bool                     biasEnabled)
 239 {
 240     return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled);
 241 }
 242
 243 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
 244                                                          bool                     biasEnabled)
 245 {
 246     return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled);
 247 }
 248
 249 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory,
 250                                                      bool                     biasEnabled)
 251 {
 252     return SimpleConvolution2d3x3TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled);
 253 }
 254
 255 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory,
 256                                                             bool                     biasEnabled)
 257 {
 258     return SimpleConvolution2d3x3TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled);
 259 }
 260
 261 template<typename T>
 262 LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
 263     armnn::IWorkloadFactory& workloadFactory,
 264     float                    qScale,
 265     int32_t                  qOffset)
 266 {
 267     // Use a single-batch 1-channel 3x3 image as input.
 268     armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType<T>());
 269     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
 270         QuantizedVector<T>(qScale, qOffset, {
 271             11,21,31,
 272             12,22,32,
 273             13,23,33
 274         })));
 275
 276     // Use 1 batch of a 1-channel 2x2 kernel.
 277     armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType<T>());
 278     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
 279         QuantizedVector<T>(qScale, qOffset, {
 280             -11,-21,
 281             -12,-22,
 282         })));
 283
 284 // Expected output is 1 batch of a 1-channel 6x8 image.
 285 // Manually calculated like this:
 286 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
 287 //[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
 288 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
 289 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
 290 //[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
 291 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
 292 //[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
 293     armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType<T>());
 294     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
 295         QuantizedVector<T>(qScale, qOffset, {
 296                0,    0,      0,    0,    0,    0,
 297             -242,  -594,  -934, -372,    0,    0,
 298             -495, -1190, -1850, -725,    0,    0,
 299             -538, -1256, -1916, -748,    0,    0,
 300             -273, -626,  -946,  -363,    0,    0,
 301                0,    0,     0,     0,    0,    0,
 302                0,    0,     0,     0,    0,    0,
 303                0,    0,     0,     0,    0,    0
 304         })));
 305
 306     return SimpleConvolution2dTestImpl<T>(workloadFactory,
 307       input,
 308       kernel,
 309       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset),
 310       expectedOutput,
 311       qScale,
 312       qOffset,
 313       1,  // Padding left.
 314       2,  // Padding top.
 315       3,  // Padding right.
 316       4); // Padding bottom.
 317 }
 318
 319 template<typename T>
 320 LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory,
 321     float                    qScale,
 322     int32_t                  qOffset)
 323 {
 324     // Use a single-batch 1-channel 5x5 image as input.
 325     armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>());
 326     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
 327         QuantizedVector<T>(qScale, qOffset, {
 328             11,21,31,41,51,
 329             12,22,32,42,52,
 330             13,23,33,43,53,
 331             14,24,34,44,54,
 332             15,25,35,45,55,
 333         })));
 334
 335     // Use 1 batch of a 1-channel 4x4 kernel.
 336     armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType<T>());
 337     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
 338         QuantizedVector<T>(qScale, qOffset, {
 339             -11,-21,-31,-41,
 340             -12,-22,-32,-42,
 341             -13,-23,-33,-43,
 342             -14,-24,-34,-44,
 343         })));
 344
 345     // Expected output is 1 batch of a 1-channel 5x5 image.
 346     armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>());
 347     std::vector<T> myVec(outputDesc.GetNumElements(), 0);
 348     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
 349         QuantizedVector<T>(qScale, qOffset, {
 350             -7140, -10580, -13940,  -9300, -5230,
 351             -9590, -14120, -18520, -12290, -6860,
 352             -9980, -14560, -18960, -12560, -7000,
 353             -7518, -10904, -14144,  -9318, -5152,
 354             -5032,  -7256,  -9376,  -6142, -3368,
 355         })));
 356
 357     return SimpleConvolution2dTestImpl<T>(workloadFactory,
 358         input,
 359         kernel,
 360         GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset),
 361         expectedOutput,
 362         qScale,
 363         qOffset,
 364         1,  // Padding left.
 365         1,  // Padding top.
 366         2,  // Padding right.
 367         2); // Padding bottom.
 368 }
 369
 370 template<typename T>
 371 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory,
 372                                                                  float qScale,
 373                                                                  int32_t qOffset,
 374                                                                  bool biasEnabled)
 375 {
 376     // Use a single-batch 2-channel 5x5 image as input.
 377     armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>());
 378     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
 379         QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
 380              0,  1,  2,  3,  4,
 381              5,  6,  7,  8,  9,
 382             10, 11, 12, 13, 14,
 383             15, 16, 17, 18, 19,
 384             20, 21, 22, 23, 24,
 385
 386             25, 26, 27, 28, 29,
 387             30, 31, 32, 33, 34,
 388             35, 36, 37, 38, 39,
 389             40, 41, 42, 43, 44,
 390             45, 46, 47, 48, 49
 391         })));
 392
 393     // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
 394     armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>());
 395     auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
 396         QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
 397             32, 31, 30, 29,
 398             28, 27, 26, 25,
 399             24, 23, 22, 21,
 400             20, 19, 18, 17,
 401
 402             16, 15, 14, 13,
 403             12, 11, 10,  9,
 404              8,  7,  6,  5,
 405              4,  3,  2,  1
 406         })));
 407
 408     // Expected output is 1 batch of a 2-channel 5x5 image.
 409     // Calculated using the python tensorflow library with strideX=1, strideY=1.
 410     armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>());
 411     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
 412         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
 413             1062, 1580, 1850, 1530, 1117,
 414             2140, 3108, 3500, 2842, 2042,
 415             3580, 5068, 5460, 4342, 3062,
 416             3618, 5072, 5390, 4248, 2971,
 417             3074, 4282, 4510, 3533, 2457,
 418             1550, 2284, 2362, 1955, 1428,
 419             2910, 4206, 4342, 3528, 2536,
 420             3390, 4886, 5022, 4068, 2916,
 421             3566, 5056, 5182, 4133, 2922,
 422             3100, 4352, 4452, 3517, 2465
 423         })));
 424
 425     return DepthwiseConvolution2dAsymmetricTestImpl<T>(workloadFactory,
 426         input,
 427         kernel,
 428         GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
 429         expectedOutput,
 430         qScale,
 431         qOffset,
 432         1,  // Padding left.
 433         1,  // Padding top.
 434         2,  // Padding right.
 435         2,  // Padding bottom.
 436         1,  // strideX
 437         1); // strideY
 438 }
 439
 440 LayerTestResult<float, 4>
 441 Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory)
 442 {
 443     return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon<float>(workloadFactory, 0.0f, 0);
 444 }
 445
 446 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory)
 447 {
 448     return SimpleConvolution2dAsymmetricPaddingTestCommon<float>(workloadFactory, 0.0f, 0);
 449 }
 450
 451 LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
 452                                                      bool                     biasEnabled)
 453 {
 454     return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
 455 }
 456
 457 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
 458                                                               bool biasEnabled)
 459 {
 460     return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
 461 }
 462
 463 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory,
 464                                                                bool                     biasEnabled)
 465 {
 466     return DepthwiseConvolution2dAsymmetricTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled);
 467 }
 468
 469 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
 470                                                             bool                     biasEnabled)
 471 {
 472     return DepthwiseConvolution2dTestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled);
 473 }
 474
 475 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory,
 476                                                                      bool biasEnabled)
 477 {
 478     return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled);
 479 }
 480
 481 LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
 482 {
 483     return Convolution1dTestImpl<float>(workloadFactory, 0.0f, 0, biasEnabled);
 484 }
 485
 486 LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
 487 {
 488     return Convolution1dTestImpl<uint8_t>(workloadFactory, 0.1f, 128, biasEnabled);
 489 }
 490
 491 LayerTestResult<float,4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
 492                                                 armnn::IWorkloadFactory& refWorkloadFactory)
 493 {
 494     return CompareConvolution2dTestImpl<float>(workloadFactory, refWorkloadFactory);
 495 }
 496
 497 template<typename T>
 498 LayerTestResult<T,4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
 499     armnn::IWorkloadFactory& refWorkloadFactory)
 500 {
 501     return CompareDepthwiseConvolution2dTestImpl<T>(workloadFactory, refWorkloadFactory);
 502 }
 503
 504 template LayerTestResult<float, 4> CompareDepthwiseConvolution2dTest<float>(
 505     armnn::IWorkloadFactory&, armnn::IWorkloadFactory&);
 506 template LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dTest<uint8_t>(
 507     armnn::IWorkloadFactory&, armnn::IWorkloadFactory&);
 508
 509 LayerTestResult<float,4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory)
 510 {
 511     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
 512     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
 513     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
 514 }
 515
 516 LayerTestResult<float,4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory)
 517 {
 518     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
 519     auto normChannel = armnn::NormalizationAlgorithmChannel::Within;
 520     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
 521 }
 522
 523 LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta)
 524 {
 525     return SimpleSoftmaxTestImpl<float>(workloadFactory, beta);
 526 }
 527
 528 LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta)
 529 {
 530     return SimpleSoftmaxTestImpl<uint8_t>(workloadFactory, beta);
 531 }
 532
 533 LayerTestResult<float,4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory,
 534                                                   armnn::IWorkloadFactory& refWorkloadFactory,
 535                                                   armnn::NormalizationAlgorithmChannel normChannel,
 536                                                   armnn::NormalizationAlgorithmMethod normMethod)
 537 {
 538     return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod);
 539 }
 540
 541 LayerTestResult<float,2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory,
 542     armnn::IWorkloadFactory& refWorkloadFactory,
 543     float beta)
 544 {
 545     return CompareSoftmaxTestImpl<float>(workloadFactory, refWorkloadFactory, beta);
 546 }
 547
 548 LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory,
 549     armnn::IWorkloadFactory& refWorkloadFactory,
 550     float beta)
 551 {
 552     return CompareSoftmaxTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, beta);
 553 }
 554
 555 std::vector<LayerTestResult<float,3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory)
 556 {
 557     return SplitterTestCommon<float>(workloadFactory);
 558 }
 559
 560 std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory)
 561 {
 562     return SplitterTestCommon<uint8_t>(workloadFactory, 1.0f, 0);
 563 }
 564
 565 LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory)
 566 {
 567     return CopyViaSplitterTestImpl<float>(workloadFactory, 0.0f, 0);
 568 }
 569
 570 LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory)
 571 {
 572     return CopyViaSplitterTestImpl<uint8_t>(workloadFactory, 1.0f, 0);
 573 }
 574
 575 LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
 576         armnn::IWorkloadFactory& workloadFactory)
 577 {
 578     armnn::TensorInfo inputDesc({ 2, 2 }, armnn::GetDataType<float>());
 579     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
 580             { 2., 3., 3., 4. }));
 581
 582     armnn::TensorInfo outputDesc({ 2, 4 }, armnn::GetDataType<float>());
 583     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
 584             {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
 585              -0.42734814f, -0.00478661f,  0.13455015f, -0.03560682f}));
 586     return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput);
 587 }
 588
 589 LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
 590         armnn::IWorkloadFactory& workloadFactory)
 591 {
 592     armnn::TensorInfo inputDesc({ 2, 5 }, armnn::GetDataType<float>());
 593     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
 594             {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
 595              0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}));
 596
 597     armnn::TensorInfo outputDesc({ 2, 16 }, armnn::GetDataType<float>());
 598     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
 599             {-0.00396806f, 0.029352f,     -0.00279226f, 0.0159977f,   -0.00835576f,
 600              -0.0211779f,  0.0283512f,    -0.0114597f,  0.00907307f,  -0.0244004f,
 601              -0.0152191f,  -0.0259063f,   0.00914318f,  0.00415118f,  0.017147f,
 602              0.0134203f, -0.013869f,    0.0287268f,   -0.00334693f, 0.00733398f,  -0.0287926f,
 603              -0.0186926f,   0.0193662f,   -0.0115437f,  0.00422612f,  -0.0345232f,
 604              0.00223253f,   -0.00957321f, 0.0210624f,   0.013331f,    0.0150954f,
 605              0.02168f}));
 606     return LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(workloadFactory, input, expectedOutput);
 607 }
 608
 609 LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory)
 610 {
 611     armnn::TensorInfo inputDesc({2, 2}, armnn::GetDataType<float>());
 612     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
 613             {2., 3., 3., 4.}));
 614
 615
 616     armnn::TensorInfo outputDesc({2, 4}, armnn::GetDataType<float>());
 617     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
 618             {{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
 619               -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f}}));
 620
 621     return LstmNoCifgNoPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput);
 622 }
 623
 624 LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory)
 625 {
 626     unsigned int outputWidth = 3;
 627     unsigned int outputHeight = 6;
 628     unsigned int outputChannels = 3;
 629
 630     unsigned int inputWidth1 = 3;
 631     unsigned int inputHeight1 = 6;
 632     unsigned int inputChannels1 = 2;
 633
 634     unsigned int inputWidth2 = 3;
 635     unsigned int inputHeight2 = 6;
 636     unsigned int inputChannels2 = 1;
 637
 638     // Define the tensor descriptors.
 639     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
 640     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
 641     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
 642
 643     LayerTestResult<float,3> ret(outputTensorInfo);
 644
 645     ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
 646     {
 647             1.0f, 2.0f, 3.0f,
 648             4.0f, 5.0f, 6.0f,
 649             7.0f, 8.0f, 9.0f,
 650             10.0f, 11.0f, 12.0f,
 651             13.0f, 14.0f, 15.0f,
 652             16.0f, 17.0f, 18.0f,
 653
 654             19.0f, 20.0f, 21.0f,
 655             22.0f, 23.0f, 24.0f,
 656             25.0f, 26.0f, 27.0f,
 657             28.0f, 29.0f, 30.0f,
 658             31.0f, 32.0f, 33.0f,
 659             34.0f, 35.0f, 36.0f,
 660
 661             37.0f, 38.0f, 39.0f,
 662             40.0f, 41.0f, 42.0f,
 663             43.0f, 44.0f, 45.0f,
 664             46.0f, 47.0f, 48.0f,
 665             49.0f, 50.0f, 51.0f,
 666             52.0f, 53.0f, 54.0f,
 667         })
 668     );
 669
 670     auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
 671         {
 672             1.0f, 2.0f, 3.0f,
 673             4.0f, 5.0f, 6.0f,
 674             7.0f, 8.0f, 9.0f,
 675             10.0f, 11.0f, 12.0f,
 676             13.0f, 14.0f, 15.0f,
 677             16.0f, 17.0f, 18.0f,
 678
 679             19.0f, 20.0f, 21.0f,
 680             22.0f, 23.0f, 24.0f,
 681             25.0f, 26.0f, 27.0f,
 682             28.0f, 29.0f, 30.0f,
 683             31.0f, 32.0f, 33.0f,
 684             34.0f, 35.0f, 36.0f,
 685         })
 686     );
 687
 688     auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
 689         {
 690             37.0f, 38.0f, 39.0f,
 691             40.0f, 41.0f, 42.0f,
 692             43.0f, 44.0f, 45.0f,
 693             46.0f, 47.0f, 48.0f,
 694             49.0f, 50.0f, 51.0f,
 695             52.0f, 53.0f, 54.0f,
 696         })
 697     );
 698
 699     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
 700     armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
 701
 702     std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
 703     armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
 704
 705     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 706
 707     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
 708
 709     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
 710         subTensorsSupported ?
 711             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
 712             workloadFactory.CreateTensorHandle(inputTensorInfo1);
 713
 714     std::unique_ptr<armnn::ITensorHandle> inputHandle2  =
 715         subTensorsSupported ?
 716             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
 717             workloadFactory.CreateTensorHandle(inputTensorInfo2);
 718
 719     armnn::MergerQueueDescriptor data;
 720     armnn::WorkloadInfo info;
 721     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
 722     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
 723     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 724
 725     data.m_ViewOrigins.push_back(window1);
 726     data.m_ViewOrigins.push_back(window2);
 727
 728     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
 729
 730     inputHandle1->Allocate();
 731     inputHandle2->Allocate();
 732     outputHandle->Allocate();
 733
 734     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
 735     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
 736
 737     workloadFactory.Finalize();
 738     workload->Execute();
 739
 740     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
 741
 742     return ret;
 743 }
 744
 745 LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory)
 746 {
 747     unsigned int batchSize = 2;
 748     unsigned int channels  = 2;
 749     unsigned int height    = 2;
 750     unsigned int width     = 3;
 751
 752     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
 753     armnn::TensorInfo outputTensorInfo;
 754
 755     unsigned int shape[] = {batchSize, channels, height, width};
 756
 757     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
 758     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
 759     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
 760
 761
 762     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>(
 763         {
 764             0.0f, 2.0f, 1.0f,
 765             0.2f, 1.0f, 2.0f,
 766
 767             1.0f, 2.0f, 1.0f,
 768             0.2f, 1.0f, 2.0f,
 769
 770             0.0f, 2.0f, 1.0f,
 771             4.2f, 1.0f, 2.0f,
 772
 773             0.0f, 0.0f, 1.0f,
 774             0.2f, 1.0f, 2.0f,
 775         }));
 776
 777     auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>(
 778         {
 779             1.0f, 2.0f, 1.0f,
 780             0.0f, 1.0f, 2.0f,
 781
 782             1.0f, 2.0f, -2.0f,
 783             0.2f, 1.0f, 2.0f,
 784
 785             0.0f, 2.0f, 1.0f,
 786             4.2f, 0.0f, -3.0f,
 787
 788             0.0f, 0.0f, 1.0f,
 789             0.7f, 1.0f, 5.0f,
 790         }));
 791
 792     LayerTestResult<float,4> ret(outputTensorInfo);
 793     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>(
 794         {
 795             1.0f, 4.0f, 2.0f,
 796             0.2f, 2.0f, 4.0f,
 797
 798             2.0f, 4.0f, -1.0f,
 799             0.4f, 2.0f, 4.0f,
 800
 801             0.0f, 4.0f, 2.0f,
 802             8.4f, 1.0f, -1.0f,
 803
 804             0.0f, 0.0f, 2.0f,
 805             0.9f, 2.0f, 7.0f,
 806         }));
 807
 808     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
 809     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
 810     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 811
 812     armnn::AdditionQueueDescriptor data;
 813     armnn::WorkloadInfo info;
 814     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
 815     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
 816     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 817
 818     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
 819
 820     inputHandle1->Allocate();
 821     inputHandle2->Allocate();
 822     outputHandle->Allocate();
 823
 824     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
 825     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
 826
 827     workloadFactory.Finalize();
 828     workload->Execute();
 829
 830     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 831
 832     return ret;
 833 }
 834
 835 template <typename T>
 836 LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory,
 837     float qScale,
 838     int32_t qOffset)
 839 {
 840     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType<T>());
 841     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType<T>());
 842     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
 843
 844     if (armnn::IsQuantizedType<T>())
 845     {
 846         inputTensorInfo1.SetQuantizationScale(qScale);
 847         inputTensorInfo1.SetQuantizationOffset(qOffset);
 848         inputTensorInfo2.SetQuantizationScale(qScale);
 849         inputTensorInfo2.SetQuantizationOffset(qOffset);
 850         outputTensorInfo.SetQuantizationScale(qScale);
 851         outputTensorInfo.SetQuantizationOffset(qOffset);
 852     }
 853
 854     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
 855         {
 856             0.0f,
 857             1.0f,
 858
 859             2.0f,
 860             3.0f,
 861
 862             4.0f,
 863             5.0f,
 864         }));
 865
 866     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
 867         {
 868             0.5f, 1.5f, 2.5f,
 869             3.5f, 4.5f, 5.5f,
 870         }));
 871
 872     LayerTestResult<T,4> ret(outputTensorInfo);
 873     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
 874         {
 875             0.5f, 1.5f, 2.5f,
 876             4.5f, 5.5f, 6.5f,
 877
 878             2.5f, 3.5f, 4.5f,
 879             6.5f, 7.5f, 8.5f,
 880
 881             4.5f, 5.5f, 6.5f,
 882             8.5f, 9.5f, 10.5f,
 883         }));
 884
 885     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
 886     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
 887     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 888
 889     armnn::AdditionQueueDescriptor data;
 890     armnn::WorkloadInfo info;
 891     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
 892     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
 893     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 894
 895     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
 896
 897     inputHandle1->Allocate();
 898     inputHandle2->Allocate();
 899     outputHandle->Allocate();
 900
 901     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
 902     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
 903
 904     workloadFactory.Finalize();
 905     workload->Execute();
 906
 907     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 908
 909     return ret;
 910 }
 911
 912 template <typename T>
 913 LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory,
 914     float qScale,
 915     int32_t qOffset)
 916 {
 917     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
 918     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType<T>());
 919     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
 920
 921     if (armnn::IsQuantizedType<T>())
 922     {
 923         inputTensorInfo1.SetQuantizationScale(qScale);
 924         inputTensorInfo1.SetQuantizationOffset(qOffset);
 925         inputTensorInfo2.SetQuantizationScale(qScale);
 926         inputTensorInfo2.SetQuantizationOffset(qOffset);
 927         outputTensorInfo.SetQuantizationScale(qScale);
 928         outputTensorInfo.SetQuantizationOffset(qOffset);
 929     }
 930
 931     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
 932         {
 933              0.0f,  1.0f,  2.0f,
 934              3.0f,  4.0f,  5.0f,
 935              6.0f,  7.0f,  8.0f,
 936              9.0f, 10.0f, 11.0f,
 937             12.0f, 13.0f, 14.0f,
 938             15.0f, 16.0f, 17.0f,
 939         }));
 940
 941     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
 942         {
 943             0.5f,
 944         }));
 945
 946     LayerTestResult<T,4> ret(outputTensorInfo);
 947     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
 948         {
 949              0.5f,  1.5f,  2.5f,
 950              3.5f,  4.5f,  5.5f,
 951              6.5f,  7.5f,  8.5f,
 952              9.5f, 10.5f, 11.5f,
 953             12.5f, 13.5f, 14.5f,
 954             15.5f, 16.5f, 17.5f,
 955         }));
 956
 957     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
 958     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
 959     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 960
 961     armnn::AdditionQueueDescriptor data;
 962     armnn::WorkloadInfo info;
 963     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
 964     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
 965     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 966
 967     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
 968
 969     inputHandle1->Allocate();
 970     inputHandle2->Allocate();
 971     outputHandle->Allocate();
 972
 973     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
 974     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
 975
 976     workloadFactory.Finalize();
 977     workload->Execute();
 978
 979     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 980
 981     return ret;
 982 }
 983
 984 LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory)
 985 {
 986     return AdditionBroadcastTestImpl<float>(workloadFactory, 0.0f, 0);
 987 }
 988
 989 LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory)
 990 {
 991     return AdditionBroadcastTestImpl<uint8_t>(workloadFactory, 2.f, 0);
 992 }
 993
 994 LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
 995 {
 996     return AdditionBroadcast1ElementTestImpl<float>(workloadFactory, 0.0f, 0);
 997 }
 998
 999 LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
1000 {
1001     return AdditionBroadcast1ElementTestImpl<uint8_t>(workloadFactory, 0.1333333f, 128);
1002 }
1003
1004 LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory,
1005                                     armnn::IWorkloadFactory& refWorkloadFactory)
1006 {
1007     unsigned int batchSize = 4;
1008     unsigned int channels  = 1;
1009     unsigned int height    = 2;
1010     unsigned int width     = 3;
1011
1012     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
1013     armnn::TensorInfo outputTensorInfo;
1014
1015     unsigned int shape[] = {batchSize, channels, height, width};
1016
1017     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1018     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1019     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1020
1021     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232);
1022     auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456);
1023
1024     LayerTestResult<float,4> ret(outputTensorInfo);
1025
1026     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1027     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
1028     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1029
1030     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
1031     std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2);
1032     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1033
1034     armnn::AdditionQueueDescriptor data;
1035     armnn::WorkloadInfo info;
1036     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1037     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
1038     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1039
1040     armnn::AdditionQueueDescriptor refData = data;
1041     armnn::WorkloadInfo refInfo = info;
1042     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get());
1043     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get());
1044     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1045
1046     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
1047     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo);
1048
1049     inputHandle1->Allocate();
1050     inputHandle2->Allocate();
1051     outputHandle->Allocate();
1052     inputHandle1Ref->Allocate();
1053     inputHandle2Ref->Allocate();
1054     outputHandleRef->Allocate();
1055
1056     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1057     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
1058     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
1059     CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]);
1060
1061     workloadFactory.Finalize();
1062     workload->Execute();
1063     refWorkloadFactory.Finalize();
1064     workloadRef->Execute();
1065
1066     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1067     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1068
1069     return ret;
1070 }
1071
1072 namespace {
1073 LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory,
1074                                                   const unsigned int shape0[4],
1075                                                   const std::vector<float> & values0,
1076                                                   const unsigned int shape1[4],
1077                                                   const std::vector<float> & values1,
1078                                                   const unsigned int outShape[4],
1079                                                   const std::vector<float> & outValues)
1080 {
1081     const size_t dimensionCount = 4;
1082     armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32};
1083     armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32};
1084     armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32};
1085
1086     auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0);
1087     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1);
1088
1089     LayerTestResult<float,4> ret(outputTensorInfo);
1090
1091     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1092     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1093     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1094
1095     armnn::MultiplicationQueueDescriptor data;
1096     armnn::WorkloadInfo info;
1097     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
1098     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1099     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1100
1101     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
1102
1103     inputHandle0->Allocate();
1104     inputHandle1->Allocate();
1105     outputHandle->Allocate();
1106
1107     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1108     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1109
1110     workloadFactory.Finalize();
1111     workload->Execute();
1112
1113     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1114
1115     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues);
1116     return ret;
1117 }
1118 } // anonymous namespace
1119
1120
1121 LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
1122 {
1123     const unsigned int width = 2;
1124     const unsigned int height = 2;
1125     const unsigned int channelCount = 2;
1126     const unsigned int batchSize = 2;
1127
1128     unsigned int shape[] = { batchSize, channelCount, height, width };
1129
1130     std::vector<float> input0({
1131         1,  1,  1,  1,    2,  2,  2,  2,
1132         3,  3,  3,  3,    4,  4,  4,  4 });
1133
1134     std::vector<float> input1({
1135         2,  2,  2,  2,    3,  3,  3,  3,
1136         4,  4,  4,  4,    5,  5,  5,  5 });
1137
1138     std::vector<float> output({
1139         2,  2,  2,  2,    6,  6,  6,  6,
1140         12, 12, 12, 12,  20, 20, 20, 20 });
1141
1142     return MultiplicationTestHelper(workloadFactory,
1143                                     shape,
1144                                     input0,
1145                                     shape,
1146                                     input1,
1147                                     shape,
1148                                     output);
1149 }
1150
1151 LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
1152 {
1153     unsigned int shape0[] = { 1, 2, 2, 2 };
1154     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
1155
1156     unsigned int shape1[] = { 1, 1, 1, 1 };
1157     std::vector<float> input1({ 2 });
1158
1159     std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16});
1160
1161     return MultiplicationTestHelper(workloadFactory,
1162                                     shape0,
1163                                     input0,
1164                                     shape1,
1165                                     input1,
1166                                     shape0,
1167                                     output);
1168 }
1169
1170 LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory)
1171 {
1172     unsigned int shape0[] = { 1, 3, 3, 2 };
1173     std::vector<float> input0({
1174         1,   2,      3,  4,      5,  6,
1175         7,   8,      9, 10,     11, 12,
1176         13, 14,     15, 16,     17, 18});
1177
1178     unsigned int shape1[] = { 1, 1, 1, 2 };
1179     std::vector<float> input1({ 1, 2 });
1180
1181     std::vector<float> output({
1182         1,   4,       3,  8,      5, 12,
1183         7,   16,      9, 20,     11, 24,
1184         13,  28,     15, 32,     17, 36});
1185
1186     return MultiplicationTestHelper(workloadFactory,
1187                                     shape0,
1188                                     input0,
1189                                     shape1,
1190                                     input1,
1191                                     shape0,
1192                                     output);
1193 }
1194
1195 LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
1196                                           armnn::IWorkloadFactory& refWorkloadFactory)
1197 {
1198     const unsigned int width = 16;
1199     const unsigned int height = 32;
1200     const unsigned int channelCount = 2;
1201     const unsigned int batchSize = 5;
1202
1203     armnn::TensorInfo inputTensorInfo0;
1204     armnn::TensorInfo inputTensorInfo1;
1205     armnn::TensorInfo outputTensorInfo;
1206
1207     constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
1208
1209     inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1210     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1211     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1212
1213     LayerTestResult<float,4> comparisonResult(outputTensorInfo);
1214
1215     auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992);
1216     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257);
1217
1218     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1219     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1220     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1221
1222     std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0);
1223     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
1224     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1225
1226     armnn::MultiplicationQueueDescriptor data;
1227     armnn::WorkloadInfo info;
1228     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
1229     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1230     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1231
1232     armnn::MultiplicationQueueDescriptor refData = data;
1233     armnn::WorkloadInfo refInfo = info;
1234     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get());
1235     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get());
1236     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1237
1238     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
1239     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo);
1240
1241     inputHandle0->Allocate();
1242     inputHandle1->Allocate();
1243     outputHandle->Allocate();
1244     inputHandle0Ref->Allocate();
1245     inputHandle1Ref->Allocate();
1246     outputHandleRef->Allocate();
1247
1248     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1249     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1250     CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]);
1251     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
1252
1253     workloadFactory.Finalize();
1254     workload->Execute();
1255     refWorkloadFactory.Finalize();
1256     workloadRef->Execute();
1257
1258     CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get());
1259     CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get());
1260
1261     return comparisonResult;
1262 }
1263
1264 LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory,
1265                                      armnn::IWorkloadFactory& refWorkloadFactory)
1266 {
1267     const unsigned int width     = 2;
1268     const unsigned int height    = 3;
1269     const unsigned int channels  = 5;
1270     const unsigned int batchSize = 3;
1271
1272     armnn::TensorInfo inputTensorInfo;
1273     armnn::TensorInfo outputTensorInfo;
1274     armnn::TensorInfo tensorInfo;
1275
1276     constexpr unsigned int shape[]       = {batchSize, channels, height, width};
1277     constexpr unsigned int tensorShape[] = {channels};
1278
1279     inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1280     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1281     tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32);
1282
1283     auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312);
1284
1285     auto mean     = MakeRandomTensor<float, 1>(tensorInfo, 123);
1286     auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f);
1287     auto beta     = MakeRandomTensor<float, 1>(tensorInfo, 123);
1288     auto gamma    = MakeRandomTensor<float, 1>(tensorInfo, 345);
1289
1290     LayerTestResult<float,4> ret(outputTensorInfo);
1291
1292     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
1293     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1294
1295     std::unique_ptr<armnn::ITensorHandle> inputHandleRef  = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1296     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1297
1298     armnn::BatchNormalizationQueueDescriptor data;
1299     armnn::WorkloadInfo info;
1300     armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
1301     armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
1302     armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
1303     armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
1304
1305     AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
1306     AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
1307     AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
1308     AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
1309
1310     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1311     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1312     data.m_Mean             = &meanTensor;
1313     data.m_Variance         = &varianceTensor;
1314     data.m_Beta             = &betaTensor;
1315     data.m_Gamma            = &gammaTensor;
1316     data.m_Parameters.m_Eps = 0.01f;
1317
1318     armnn::BatchNormalizationQueueDescriptor refData = data;
1319     armnn::WorkloadInfo refInfo = info;
1320     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1321     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1322
1323     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
1324     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo);
1325
1326     inputHandle->Allocate();
1327     outputHandle->Allocate();
1328     inputHandleRef->Allocate();
1329     outputHandleRef->Allocate();
1330
1331     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1332     CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1333
1334     workloadFactory.Finalize();
1335     workload->Execute();
1336     refWorkloadFactory.Finalize();
1337     workloadRef->Execute();
1338
1339     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1340     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1341
1342     return ret;
1343 }
1344
1345 template<typename T>
1346 void PermuteTensorData(
1347         armnn::IWorkloadFactory& workloadFactory,
1348         const armnn::PermutationVector& mappings,
1349         armnn::TensorInfo & inputTensorInfo,
1350         const T * inputData,
1351         std::vector<T>& outputData)
1352 {
1353     BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
1354     if (inputData == nullptr)
1355     {
1356         // Nullptr is an error in the test. By returning without doing the concatenation
1357         // I expect the caller to fail the test. It still makes sense to report this as
1358         // an assert for Debug builds.
1359         return;
1360     }
1361
1362     armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
1363
1364     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1365     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1366
1367     armnn::PermuteQueueDescriptor queueDescriptor;
1368     queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings};
1369     armnn::WorkloadInfo workloadInfo;
1370     AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
1371     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
1372
1373     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
1374
1375     inputHandle->Allocate();
1376     outputHandle->Allocate();
1377
1378     CopyDataToITensorHandle(inputHandle.get(), inputData);
1379
1380     workload->Execute();
1381
1382     outputData.resize(outputTensorInfo.GetNumElements());
1383     CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
1384     inputTensorInfo = outputTensorInfo;
1385 }
1386
1387 armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation(
1388         const std::vector<armnn::TensorInfo> & inputTensorInfos,
1389         unsigned int concatDim)
1390 {
1391     std::vector<armnn::TensorShape> shapes;
1392     shapes.reserve(inputTensorInfos.size());
1393     for (const armnn::TensorInfo& it: inputTensorInfos)
1394     {
1395         shapes.push_back(it.GetShape());
1396     }
1397
1398     return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(),
1399                                                          shapes.end(),
1400                                                          concatDim);
1401 }
1402
1403 //
1404 // Concatenation is only supported for N and C dimensions for NCHW. In case of
1405 // <4 dimensions we need to make sure that the concat dimensions are at least
1406 // the 3rd slowest iterating one.
1407 //
1408
1409 bool NeedPermuteForConcat(
1410         const std::vector<armnn::TensorInfo> & inputTensorInfos,
1411         unsigned int concatDim)
1412 {
1413     // See note above. Additionally we expect the input shapes to have the
1414     // same number of dimensions.
1415     unsigned int nDimensions = 0;
1416
1417     // Determine the number of dimensions as well as sanity check them
1418     // agains test implementation issues.
1419     for (auto && tensorInfo : inputTensorInfos)
1420     {
1421         if (!nDimensions)
1422         {
1423             nDimensions = tensorInfo.GetShape().GetNumDimensions();
1424         }
1425         else
1426         {
1427             BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
1428                 "Input shapes must have the same number of dimensions");
1429         }
1430     }
1431
1432     return (nDimensions-concatDim) < 3;
1433 }
1434
1435 armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape)
1436 {
1437     unsigned int numDims = inputShape.GetNumDimensions();
1438     if (numDims >= 3)
1439     {
1440         // Nothing to do if the inputShape has at least 3 dimensions.
1441         return inputShape;
1442     }
1443
1444     std::vector<unsigned int> newDims(size_t(3), 1u);
1445     unsigned int expandedBy = 3 - numDims;
1446     for (unsigned int i=0; i<numDims; ++i)
1447     {
1448         newDims[expandedBy+i] = inputShape[i];
1449     }
1450     return armnn::TensorShape(3u, &newDims[0]);
1451 }
1452
1453 void Generate3dPermuteVectorForConcat(
1454         unsigned int numDimensions,
1455         unsigned int & concatDim,
1456         std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations)
1457 {
1458     BOOST_ASSERT_MSG(numDimensions <= 3,
1459        "Only dimensions 1,2 and 3 are supported by this helper");
1460
1461     unsigned int expandedBy = 3 - numDimensions;
1462     unsigned int expandedConcatAxis = concatDim + expandedBy;
1463
1464     if (expandedConcatAxis == 2)
1465     {
1466         concatDim = 0;
1467         armnn::PermutationVector forwardPermutation({1, 2, 0});
1468         armnn::PermutationVector reversePermutation({2, 0, 1});
1469         permutations = std::make_pair(forwardPermutation, reversePermutation);
1470     }
1471     else if (expandedConcatAxis == 1)
1472     {
1473         concatDim = 0;
1474         armnn::PermutationVector forwardPermutation({2, 0, 1});
1475         armnn::PermutationVector reversePermutation({1, 2, 0});
1476         permutations = std::make_pair(forwardPermutation, reversePermutation);
1477     }
1478     else
1479     {
1480         BOOST_ASSERT(expandedConcatAxis == 0);
1481         concatDim = 0;
1482     }
1483 }
1484
1485 //
1486 // Permute the input tensors so we can do a supported concatenation.
1487 // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
1488 // at the front. Finally this function tells what the output shape
1489 // of the permuted concatenated tensor is going to be.
1490 //
1491 template <typename T>
1492 void PermuteInputsForConcat(
1493         armnn::IWorkloadFactory& workloadFactory,
1494         std::vector<armnn::TensorInfo> & inputTensorInfos,
1495         std::vector<T *> & inputData,
1496         std::vector<std::vector<T>> & inputDataStorage,
1497         armnn::PermutationVector & permuteVector,
1498         unsigned int & concatDim,
1499         armnn::TensorInfo & outputTensorInfo)
1500 {
1501     BOOST_ASSERT_MSG(inputTensorInfos.size() > 1,
1502         "Expecting more than one tensor to be concatenated here");
1503
1504     unsigned int numDims = 0;
1505     unsigned int nthInput = 0;
1506     const armnn::PermutationVector identity({0, 1, 2});
1507
1508     std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations =
1509         std::make_pair(identity, identity);
1510
1511     inputDataStorage.resize(inputData.size());
1512
1513     for (auto && tensorInfo : inputTensorInfos)
1514     {
1515         if (numDims == 0)
1516         {
1517             numDims = tensorInfo.GetShape().GetNumDimensions();
1518             Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
1519             // Store the reverese permutation.
1520             permuteVector = permutations.second;
1521             BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity),
1522                 "Test logic error, we don't need permutation, so we shouldn't arrive here");
1523         }
1524         else
1525         {
1526             BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
1527                 "All inputs must have the same number of dimensions");
1528         }
1529
1530         armnn::TensorInfo newTensorInfo = tensorInfo;
1531         newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
1532
1533         PermuteTensorData<T>(workloadFactory,
1534                              permutations.first,
1535                              newTensorInfo,
1536                              inputData[nthInput],
1537                              inputDataStorage[nthInput]);
1538
1539         inputData[nthInput] = inputDataStorage[nthInput].data();
1540         inputTensorInfos[nthInput] = newTensorInfo;
1541
1542         ++nthInput;
1543     }
1544
1545     outputTensorInfo.SetShape(
1546         armnnUtils::Permuted(
1547             ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
1548             permutations.first));
1549 }
1550
1551
1552 //
1553 // This is the pair of PermuteInputsForConcat(...) which permutes back
1554 // the output of the concatenation so we can check it against an expected
1555 // output.
1556 //
1557 template <typename T>
1558 void PermuteOutputForConcat(
1559         armnn::IWorkloadFactory& workloadFactory,
1560         const armnn::TensorInfo & tensorInfo,
1561         const armnn::PermutationVector & permuteVector,
1562         std::unique_ptr<armnn::ITensorHandle> && inputDataHandle,
1563         T * data)
1564 {
1565     BOOST_ASSERT_MSG(data != nullptr, "data must not be null");
1566     if (data == nullptr)
1567     {
1568         // Nullptr is an error in the test. By returning without doing the permutation
1569         // I expect the caller to fail the test. It still makes sense to report this as
1570         // an assert for Debug builds.
1571         return;
1572     }
1573
1574     armnn::TensorInfo resultTensorInfo = tensorInfo;
1575     std::vector<T> inputData(tensorInfo.GetNumElements());
1576     std::vector<T> outputData;
1577
1578     CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
1579
1580     PermuteTensorData<T>(workloadFactory,
1581                          permuteVector,
1582                          resultTensorInfo,
1583                          &inputData[0],
1584                          outputData);
1585
1586     ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
1587 }
1588
1589 template <typename T>
1590 void Concatenate(armnn::IWorkloadFactory& workloadFactory,
1591                  std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig,
1592                  std::initializer_list<T *> inputsOrig,
1593                  const armnn::TensorInfo& outputTensorInfoOrig,
1594                  T * output,
1595                  unsigned int concatDim)
1596 {
1597     BOOST_ASSERT_MSG(output != nullptr, "output must not be null");
1598     if (output == nullptr)
1599     {
1600         // Nullptr is an error in the test. By returning without doing the permutation
1601         // I expect the caller to fail the test. It still makes sense to report this as
1602         // an assert for Debug builds.
1603         return;
1604     }
1605
1606     armnn::MergerQueueDescriptor queueDescriptor;
1607
1608     // Saves a copy of the parameters which we might need to change.
1609     std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
1610     std::vector<T *> inputs            = inputsOrig;
1611     armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig;
1612
1613     armnn::PermutationVector permuteVector{0, 1, 2};
1614
1615     // Holds and automatically releases memory for the reshaped input data.
1616     std::vector<std::vector<T>> tmpInputDataStorage;
1617
1618     const size_t inputCount = inputTensorInfos.size();
1619
1620     bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
1621
1622     if (needPermuteForConcat)
1623     {
1624         //
1625         // We need to permute the inputs, because concatenation along
1626         // the requested axis is not supported.
1627         //
1628         PermuteInputsForConcat<T>(workloadFactory,
1629                                   inputTensorInfos,
1630                                   inputs,
1631                                   tmpInputDataStorage,
1632                                   permuteVector,
1633                                   concatDim,
1634                                   outputTensorInfo);
1635     }
1636
1637     armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim);
1638
1639     queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
1640     for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
1641     {
1642         queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
1643             viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
1644     }
1645
1646     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1647
1648     std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
1649     inputHandles.reserve(inputCount);
1650
1651     const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
1652     for (unsigned int i = 0; i < inputCount; ++i)
1653     {
1654         const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i];
1655
1656         std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ?
1657             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(),
1658                 queueDescriptor.m_ViewOrigins[i].m_Origin.data())
1659             : workloadFactory.CreateTensorHandle(inputTensorInfo);
1660
1661         inputHandles.emplace_back(std::move(inputHandle));
1662     }
1663
1664     armnn::WorkloadInfo workloadInfo;
1665
1666     for (unsigned int i = 0; i < inputCount; ++i)
1667     {
1668         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
1669     }
1670
1671     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
1672
1673     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo);
1674
1675     for (auto& inputHandle : inputHandles)
1676     {
1677         inputHandle->Allocate();
1678     }
1679
1680     outputHandle->Allocate();
1681
1682     unsigned int nextInputId = 0;
1683     for (auto& inputHandle : inputHandles)
1684     {
1685         CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
1686         ++nextInputId;
1687     }
1688
1689     workloadFactory.Finalize();
1690     workload->Execute();
1691
1692     if (needPermuteForConcat)
1693     {
1694         PermuteOutputForConcat<T>(workloadFactory,
1695                                   outputTensorInfo,
1696                                   permuteVector,
1697                                   std::move(outputHandle),
1698                                   output);
1699     }
1700     else
1701     {
1702         CopyDataFromITensorHandle(output, outputHandle.get());
1703     }
1704 }
1705
1706 template <typename T>
1707 LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset)
1708 {
1709     armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType<T>());
1710
1711     auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f }));
1712     auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f }));
1713     auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f }));
1714
1715     armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType<T>());
1716
1717     LayerTestResult<T, 1> result(outputTensorInfo);
1718
1719     std::vector<T> output;
1720     output.resize(outputTensorInfo.GetNumElements());
1721     Concatenate<T>(workloadFactory,
1722         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
1723         { input0.data(), input1.data(), input2.data() },
1724         outputTensorInfo,
1725         output.data(),
1726         0);
1727
1728     result.output = MakeTensor<T, 1>(outputTensorInfo, output);
1729     result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1730         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
1731     }));
1732
1733     return result;
1734 }
1735
1736 LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory)
1737 {
1738     return Concatenation1dTestImpl<float>(workloadFactory, 0.0f, 0);
1739 }
1740
1741 template <typename T>
1742 LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
1743     const armnn::TensorInfo& outputTensorInfo,
1744     unsigned int dimension,
1745     const float qScale,
1746     const int32_t qOffset)
1747 {
1748     armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType<T>());
1749
1750     auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1751         // Batch 0
1752         1.0f, 2.0f, 3.0f,
1753
1754         // Batch 1
1755         10.0f, 11.0f, 12.0f,
1756     }));
1757
1758     auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1759         // Batch 0
1760         4.0f, 5.0f, 6.0f,
1761
1762         // Batch 1
1763         13.0f, 14.0f, 15.0f,
1764     }));
1765
1766     auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1767         // Batch 0
1768         7.0f, 8.0f, 9.0f,
1769
1770         // Batch 1
1771         16.0f, 17.0f, 18.0f,
1772     }));
1773
1774     LayerTestResult<T, 2> result(outputTensorInfo);
1775
1776     std::vector<T> output;
1777     output.resize(outputTensorInfo.GetNumElements());
1778     Concatenate<T>(workloadFactory,
1779         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
1780         { input0.data(), input1.data(), input2.data() },
1781         outputTensorInfo,
1782         output.data(),
1783         dimension);
1784
1785     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
1786     return result;
1787 }
1788
1789 template <typename T>
1790 LayerTestResult<T, 2> Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory,
1791     float qScale, int32_t qOffset)
1792 {
1793     armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>());
1794
1795     LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 0, qScale, qOffset);
1796     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1797         // Batch 0
1798         1.0f, 2.0f, 3.0f,
1799
1800         // Batch 1
1801         10.0f, 11.0f, 12.0f,
1802
1803         // Batch 2
1804         4.0f, 5.0f, 6.0f,
1805
1806         // Batch 3
1807         13.0f, 14.0f, 15.0f,
1808
1809         // Batch 4
1810         7.0f, 8.0f, 9.0f,
1811
1812         // Batch 5
1813         16.0f, 17.0f, 18.0f,
1814     }));
1815
1816     return result;
1817 }
1818
1819 LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory)
1820 {
1821     return Concatenation2dDim0TestImpl<float>(workloadFactory, 0.0f, 0);
1822 }
1823
1824 template <typename T>
1825 LayerTestResult<T, 2> Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory,
1826     float qScale, int32_t qOffset)
1827 {
1828     armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>());
1829
1830     LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset);
1831     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1832         // Batch 0
1833         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
1834
1835         // Batch 1
1836         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
1837     }));
1838
1839     return result;
1840 }
1841
1842 LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory)
1843 {
1844     return Concatenation2dDim1TestImpl<float>(workloadFactory, 0.0f, 0);
1845 }
1846
1847 template <typename T>
1848 LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1849     int32_t qOffset)
1850 {
1851     armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>());
1852     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1853         // Batch 0
1854         1.0f, 2.0f, 3.0f,
1855
1856         // Batch 1
1857         10.0f, 11.0f, 12.0f,
1858     }));
1859
1860     armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType<T>());
1861     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1862         // Batch 0
1863         4.0f, 5.0f, 6.0f,
1864
1865         // Batch 1
1866         13.0f, 14.0f, 15.0f,
1867
1868         // Batch 0
1869         7.0f, 8.0f, 9.0f,
1870     }));
1871
1872     armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType<T>());
1873     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1874         // Batch 1
1875         16.0f, 17.0f, 18.0f,
1876     }));
1877
1878     armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>());
1879     LayerTestResult<T, 2> result(outputTensorInfo);
1880
1881     std::vector<T> output;
1882     output.resize(outputTensorInfo.GetNumElements());
1883     Concatenate<T>(workloadFactory,
1884         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1885         { input0.data(), input1.data(), input2.data() },
1886         outputTensorInfo,
1887         output.data(),
1888         0);
1889
1890     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
1891     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1892         // Batch 0
1893         1.0f, 2.0f, 3.0f,
1894
1895         // Batch 1
1896         10.0f, 11.0f, 12.0f,
1897
1898         // Batch 2
1899         4.0f, 5.0f, 6.0f,
1900
1901         // Batch 3
1902         13.0f, 14.0f, 15.0f,
1903
1904         // Batch 4
1905         7.0f, 8.0f, 9.0f,
1906
1907         // Batch 5
1908         16.0f, 17.0f, 18.0f,
1909     }));
1910
1911     return result;
1912 }
1913
1914 LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
1915 {
1916     return Concatenation2dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
1917 }
1918
1919 template <typename T>
1920 LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1921     int32_t qOffset)
1922 {
1923     armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>());
1924     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1925         // Batch 0
1926         1.0f, 2.0f, 3.0f,
1927
1928         // Batch 1
1929         10.0f, 11.0f, 12.0f,
1930     }));
1931
1932     armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType<T>());
1933     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1934         // Batch 0
1935         4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
1936
1937         // Batch 1
1938         13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
1939     }));
1940
1941     armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType<T>());
1942     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1943         // Batch 0
1944         9.0f,
1945
1946         // Batch 1
1947         18.0f
1948     }));
1949
1950     armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>());
1951     LayerTestResult<T, 2> result(outputTensorInfo);
1952
1953     std::vector<T> output;
1954     output.resize(outputTensorInfo.GetNumElements());
1955     Concatenate<T>(workloadFactory,
1956         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1957         { input0.data(), input1.data(), input2.data() },
1958         outputTensorInfo,
1959         output.data(),
1960         1);
1961
1962     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
1963     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1964         // Batch 0
1965         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
1966
1967         // Batch 1
1968         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
1969     }));
1970
1971     return result;
1972 }
1973
1974 LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
1975 {
1976     return Concatenation2dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
1977 }
1978
1979 template <typename T>
1980 LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory,
1981     const armnn::TensorInfo& outputTensorInfo,
1982     unsigned int dimension,
1983     float qScale,
1984     int32_t qOffset)
1985 {
1986     armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
1987
1988     auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1989         // Batch 0, Channel 0
1990         1.0f, 2.0f,
1991
1992         // Batch 0, Channel 1
1993         3.0f, 4.0f,
1994
1995         // Batch 0, Channel 2
1996         5.0f, 6.0f,
1997
1998         // Batch 1, Channel 0
1999         19.0f, 20.0f,
2000
2001         // Batch 1, Channel 1
2002         21.0f, 22.0f,
2003
2004         // Batch 1, Channel 2
2005         23.0f, 24.0f
2006     }));
2007
2008     auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2009         // Batch 0, Channel 0
2010         7.0f, 8.0f,
2011
2012         // Batch 0, Channel 1
2013         9.0f, 10.0f,
2014
2015         // Batch 0, Channel 2
2016         11.0f, 12.0f,
2017
2018         // Batch 1, Channel 0
2019         25.0f, 26.0f,
2020
2021         // Batch 1, Channel 1
2022         27.0f, 28.0f,
2023
2024         // Batch 1, Channel 2
2025         29.0f, 30.0f
2026     }));
2027
2028     auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2029         // Batch 0, Channel 0
2030         13.0f, 14.0f,
2031
2032         // Batch 0, Channel 1
2033         15.0f, 16.0f,
2034
2035         // Batch 0, Channel 2
2036         17.0f, 18.0f,
2037
2038         // Batch 1, Channel 0
2039         31.0f, 32.0f,
2040
2041         // Batch 1, Channel 1
2042         33.0f, 34.0f,
2043
2044         // Batch 1, Channel 2
2045         35.0f, 36.0f
2046     }));
2047
2048     LayerTestResult<T, 3> result(outputTensorInfo);
2049
2050     std::vector<T> output;
2051     output.resize(outputTensorInfo.GetNumElements());
2052     Concatenate<T>(workloadFactory,
2053         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
2054         { input0.data(), input1.data(), input2.data() },
2055         outputTensorInfo,
2056         output.data(),
2057         dimension);
2058
2059     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2060     return result;
2061 }
2062
2063 template <typename T>
2064 LayerTestResult<T, 3> Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2065     int32_t qOffset)
2066 {
2067     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>());
2068
2069     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 0,
2070         qScale, qOffset);
2071     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2072         // Batch 0, Channel 0
2073         1.0f, 2.0f,
2074
2075         // Batch 0, Channel 1
2076         3.0f, 4.0f,
2077
2078         // Batch 0, Channel 2
2079         5.0f, 6.0f,
2080
2081         // Batch 1, Channel 0
2082         19.0f, 20.0f,
2083
2084         // Batch 1, Channel 1
2085         21.0f, 22.0f,
2086
2087         // Batch 1, Channel 2
2088         23.0f, 24.0f,
2089
2090         // Batch 2, Channel 0
2091         7.0f, 8.0f,
2092
2093         // Batch 2, Channel 1
2094         9.0f, 10.0f,
2095
2096         // Batch 2, Channel 2
2097         11.0f, 12.0f,
2098
2099         // Batch 3, Channel 0
2100         25.0f, 26.0f,
2101
2102         // Batch 3, Channel 1
2103         27.0f, 28.0f,
2104
2105         // Batch 3, Channel 2
2106         29.0f, 30.0f,
2107
2108         // Batch 4, Channel 0
2109         13.0f, 14.0f,
2110
2111         // Batch 4, Channel 1
2112         15.0f, 16.0f,
2113
2114         // Batch 4, Channel 2
2115         17.0f, 18.0f,
2116
2117         // Batch 5, Channel 0
2118         31.0f, 32.0f,
2119
2120         // Batch 5, Channel 1
2121         33.0f, 34.0f,
2122
2123         // Batch 5, Channel 2
2124         35.0f, 36.0f
2125     }));
2126     return result;
2127 }
2128
2129 LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory)
2130 {
2131     return Concatenation3dDim0TestImpl<float>(workloadFactory, 0.0f, 0);
2132 }
2133
2134 template <typename T>
2135 LayerTestResult<T, 3> Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory,
2136     float qScale, int32_t qOffset)
2137 {
2138     armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType<T>());
2139
2140     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset);
2141     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2142         // Batch 0, Channel 0
2143         1.0f, 2.0f,
2144
2145         // Batch 0, Channel 1
2146         3.0f, 4.0f,
2147
2148         // Batch 0, Channel 2
2149         5.0f, 6.0f,
2150
2151         // Batch 0, Channel 3
2152         7.0f, 8.0f,
2153
2154         // Batch 0, Channel 4
2155         9.0f, 10.0f,
2156
2157         // Batch 0, Channel 5
2158         11.0f, 12.0f,
2159
2160         // Batch 0, Channel 6
2161         13.0f, 14.0f,
2162
2163         // Batch 0, Channel 7
2164         15.0f, 16.0f,
2165
2166         // Batch 0, Channel 8
2167         17.0f, 18.0f,
2168
2169         // Batch 1, Channel 0
2170         19.0f, 20.0f,
2171
2172         // Batch 1, Channel 1
2173         21.0f, 22.0f,
2174
2175         // Batch 1, Channel 2
2176         23.0f, 24.0f,
2177
2178         // Batch 1, Channel 3
2179         25.0f, 26.0f,
2180
2181         // Batch 1, Channel 4
2182         27.0f, 28.0f,
2183
2184         // Batch 1, Channel 5
2185         29.0f, 30.0f,
2186
2187         // Batch 1, Channel 6
2188         31.0f, 32.0f,
2189
2190         // Batch 1, Channel 7
2191         33.0f, 34.0f,
2192
2193         // Batch 1, Channel 8
2194         35.0f, 36.0f
2195     }));
2196
2197     return result;
2198 }
2199
2200 LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory)
2201 {
2202     return Concatenation3dDim1TestImpl<float>(workloadFactory, 0.0f, 0);
2203 }
2204
2205 template <typename T>
2206 LayerTestResult<T, 3> Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory,
2207     float qScale, int32_t qOffset)
2208 {
2209     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>());
2210
2211     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 2, qScale, qOffset);
2212     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2213         // Batch 0, Channel 0
2214         1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
2215
2216         // Batch 0, Channel 1
2217         3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
2218
2219         // Batch 0, Channel 2
2220         5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
2221
2222         // Batch 1, Channel 0
2223         19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
2224
2225         // Batch 1, Channel 1
2226         21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
2227
2228         // Batch 1, Channel 2
2229         23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
2230     }));
2231
2232     return result;
2233 }
2234
2235 LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory)
2236 {
2237     return Concatenation3dDim2TestImpl<float>(workloadFactory, 0.0f, 0);
2238 }
2239
2240 template <typename T>
2241 LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2242     int32_t qOffset)
2243 {
2244     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2245     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2246             // Batch 0, Channel 0
2247             1.0f, 2.0f,
2248
2249             // Batch 0, Channel 1
2250             3.0f, 4.0f,
2251
2252             // Batch 0, Channel 2
2253             5.0f, 6.0f,
2254
2255             // Batch 1, Channel 0
2256             19.0f, 20.0f,
2257
2258             // Batch 1, Channel 1
2259             21.0f, 22.0f,
2260
2261             // Batch 1, Channel 2
2262             23.0f, 24.0f
2263     }));
2264
2265     armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType<T>());
2266     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2267             // Batch 0, Channel 0
2268             7.0f, 8.0f,
2269
2270             // Batch 0, Channel 1
2271             9.0f, 10.0f,
2272
2273             // Batch 0, Channel 2
2274             11.0f, 12.0f,
2275     }));
2276
2277     armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType<T>());
2278     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2279             // Batch 0, Channel 0
2280             25.0f, 26.0f,
2281
2282             // Batch 0, Channel 1
2283             27.0f, 28.0f,
2284
2285             // Batch 0, Channel 2
2286             29.0f, 30.0f,
2287
2288             // Batch 1, Channel 0
2289             13.0f, 14.0f,
2290
2291             // Batch 1, Channel 1
2292             15.0f, 16.0f,
2293
2294             // Batch 1, Channel 2
2295             17.0f, 18.0f,
2296
2297             // Batch 2, Channel 0
2298             31.0f, 32.0f,
2299
2300             // Batch 2, Channel 1
2301             33.0f, 34.0f,
2302
2303             // Batch 2, Channel 2
2304             35.0f, 36.0f
2305     }));
2306
2307     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>());
2308     LayerTestResult<T, 3> result(outputTensorInfo);
2309
2310     std::vector<T> output;
2311     output.resize(outputTensorInfo.GetNumElements());
2312     Concatenate<T>(workloadFactory,
2313         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2314         { input0.data(), input1.data(), input2.data() },
2315         outputTensorInfo,
2316         output.data(),
2317         0);
2318
2319     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2320     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2321         // Batch 0, Channel 0
2322         1.0f, 2.0f,
2323
2324         // Batch 0, Channel 1
2325         3.0f, 4.0f,
2326
2327         // Batch 0, Channel 2
2328         5.0f, 6.0f,
2329
2330         // Batch 1, Channel 0
2331         19.0f, 20.0f,
2332
2333         // Batch 1, Channel 1
2334         21.0f, 22.0f,
2335
2336         // Batch 1, Channel 2
2337         23.0f, 24.0f,
2338
2339         // Batch 2, Channel 0
2340         7.0f, 8.0f,
2341
2342         // Batch 2, Channel 1
2343         9.0f, 10.0f,
2344
2345         // Batch 2, Channel 2
2346         11.0f, 12.0f,
2347
2348         // Batch 3, Channel 0
2349         25.0f, 26.0f,
2350
2351         // Batch 3, Channel 1
2352         27.0f, 28.0f,
2353
2354         // Batch 3, Channel 2
2355         29.0f, 30.0f,
2356
2357         // Batch 4, Channel 0
2358         13.0f, 14.0f,
2359
2360         // Batch 4, Channel 1
2361         15.0f, 16.0f,
2362
2363         // Batch 4, Channel 2
2364         17.0f, 18.0f,
2365
2366         // Batch 5, Channel 0
2367         31.0f, 32.0f,
2368
2369         // Batch 5, Channel 1
2370         33.0f, 34.0f,
2371
2372         // Batch 5, Channel 2
2373         35.0f, 36.0f
2374     }));
2375
2376     return result;
2377 }
2378
2379 LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2380 {
2381     return Concatenation3dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2382 }
2383
2384 template <typename T>
2385 LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2386     int32_t qOffset)
2387 {
2388     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2389     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2390         // Batch 0, Channel 0
2391         1.0f, 2.0f,
2392
2393         // Batch 0, Channel 1
2394         3.0f, 4.0f,
2395
2396         // Batch 0, Channel 2
2397         5.0f, 6.0f,
2398
2399         // Batch 1, Channel 0
2400         19.0f, 20.0f,
2401
2402         // Batch 1, Channel 1
2403         21.0f, 22.0f,
2404
2405         // Batch 1, Channel 2
2406         23.0f, 24.0f
2407     }));
2408
2409     armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType<T>());
2410     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2411         // Batch 0, Channel 0
2412         7.0f, 8.0f,
2413
2414         // Batch 0, Channel 1
2415         9.0f, 10.0f,
2416
2417         // Batch 0, Channel 2
2418         11.0f, 12.0f,
2419
2420         // Batch 0, Channel 3
2421         25.0f, 26.0f,
2422
2423         // Batch 1, Channel 0
2424         27.0f, 28.0f,
2425
2426         // Batch 1, Channel 1
2427         29.0f, 30.0f,
2428
2429         // Batch 1, Channel 2
2430         13.0f, 14.0f,
2431
2432         // Batch 1, Channel 3
2433         15.0f, 16.0f,
2434     }));
2435
2436     armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType<T>());
2437     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2438         // Batch 0, Channel 0
2439         17.0f, 18.0f,
2440
2441         // Batch 1, Channel 0
2442         31.0f, 32.0f,
2443     }));
2444
2445     armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType<T>());
2446     LayerTestResult<T, 3> result(outputTensorInfo);
2447
2448     std::vector<T> output;
2449     output.resize(outputTensorInfo.GetNumElements());
2450     Concatenate<T>(workloadFactory,
2451         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2452         { input0.data(), input1.data(), input2.data() },
2453         outputTensorInfo,
2454         output.data(),
2455         1);
2456
2457     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2458     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2459         // Batch 0, Channel 0
2460         1.0f, 2.0f,
2461
2462         // Batch 0, Channel 1
2463         3.0f, 4.0f,
2464
2465         // Batch 0, Channel 2
2466         5.0f, 6.0f,
2467
2468         // Batch 0, Channel 3
2469         7.0f, 8.0f,
2470
2471         // Batch 0, Channel 4
2472         9.0f, 10.0f,
2473
2474         // Batch 0, Channel 5
2475         11.0f, 12.0f,
2476
2477         // Batch 0, Channel 6
2478         25.0f, 26.0f,
2479
2480         // Batch 0, Channel 7
2481         17.0f, 18.0f,
2482
2483         // Batch 1, Channel 0
2484         19.0f, 20.0f,
2485
2486         // Batch 1, Channel 1
2487         21.0f, 22.0f,
2488
2489         // Batch 1, Channel 2
2490         23.0f, 24.0f,
2491
2492         // Batch 1, Channel 3
2493         27.0f, 28.0f,
2494
2495         // Batch 1, Channel 4
2496         29.0f, 30.0f,
2497
2498         // Batch 1, Channel 5
2499         13.0f, 14.0f,
2500
2501         // Batch 1, Channel 6
2502         15.0f, 16.0f,
2503
2504         // Batch 1, Channel 7
2505         31.0f, 32.0f,
2506     }));
2507
2508     return result;
2509 }
2510
2511 LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2512 {
2513     return Concatenation3dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2514 }
2515
2516 template <typename T>
2517 LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2518     int32_t qOffset)
2519 {
2520     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2521     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2522         // Batch 0, Channel 0
2523         1.0f, 2.0f,
2524
2525         // Batch 0, Channel 1
2526         3.0f, 4.0f,
2527
2528         // Batch 0, Channel 2
2529         5.0f, 6.0f,
2530
2531         // Batch 1, Channel 0
2532         19.0f, 20.0f,
2533
2534         // Batch 1, Channel 1
2535         21.0f, 22.0f,
2536
2537         // Batch 1, Channel 2
2538         23.0f, 24.0f
2539     }));
2540
2541     armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType<T>());
2542     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2543         // Batch 0, Channel 0
2544         7.0f,
2545
2546         // Batch 0, Channel 1
2547         9.0f,
2548
2549         // Batch 0, Channel 2
2550         11.0f,
2551
2552         // Batch 1, Channel 0
2553         25.0f,
2554
2555         // Batch 1, Channel 1
2556         27.0f,
2557
2558         // Batch 1, Channel 2
2559         29.0f
2560     }));
2561
2562     armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType<T>());
2563     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2564         // Batch 0, Channel 0
2565         13.0f, 14.0f, 50.0f,
2566
2567         // Batch 0, Channel 1
2568         15.0f, 16.0f, 51.0f,
2569
2570         // Batch 0, Channel 2
2571         17.0f, 18.0f, 52.0f,
2572
2573         // Batch 1, Channel 0
2574         31.0f, 32.0f, 53.0f,
2575
2576         // Batch 1, Channel 1
2577         33.0f, 34.0f, 54.0f,
2578
2579         // Batch 1, Channel 2
2580         35.0f, 36.0f, 55.0f,
2581     }));
2582
2583     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>());
2584     LayerTestResult<T, 3> result(outputTensorInfo);
2585
2586     std::vector<T> output;
2587     output.resize(outputTensorInfo.GetNumElements());
2588     Concatenate<T>(workloadFactory,
2589         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2590         { input0.data(), input1.data(), input2.data() },
2591         outputTensorInfo,
2592         output.data(),
2593         2);
2594
2595     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2596     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2597         // Batch 0, Channel 0
2598         1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
2599
2600         // Batch 0, Channel 1
2601         3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
2602
2603         // Batch 0, Channel 2
2604         5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
2605
2606         // Batch 1, Channel 0
2607         19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
2608
2609         // Batch 1, Channel 1
2610         21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
2611
2612         // Batch 1, Channel 2
2613         23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
2614     }));
2615
2616     return result;
2617 }
2618
2619 LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2620 {
2621     return Concatenation3dDim2DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2622 }
2623
2624 LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory)
2625 {
2626     constexpr unsigned int inputWidth = 4;
2627     constexpr unsigned int inputHeight = 4;
2628     constexpr unsigned int inputChannels = 1;
2629     constexpr unsigned int inputBatchSize = 1;
2630
2631     constexpr unsigned int outputWidth = inputWidth;
2632     constexpr unsigned int outputHeight = inputHeight;
2633     constexpr unsigned int outputChannels = inputChannels;
2634     constexpr unsigned int outputBatchSize = inputBatchSize;
2635
2636     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2637         armnn::DataType::Float32);
2638     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2639         armnn::DataType::Float32);
2640
2641     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2642         1.0f, 2.0f, 3.0f, 4.0f,
2643         2.0f, 3.0f, 4.0f, 5.0f,
2644         3.0f, 4.0f, 5.0f, 6.0f,
2645         4.0f, 5.0f, 6.0f, 7.0f
2646     }));
2647
2648     LayerTestResult<float, 4> result(outputTensorInfo);
2649     result.outputExpected = input;
2650
2651     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2652     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2653
2654     armnn::ResizeBilinearQueueDescriptor descriptor;
2655     armnn::WorkloadInfo info;
2656     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2657     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2658
2659     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2660
2661     inputHandle->Allocate();
2662     outputHandle->Allocate();
2663     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2664
2665     workloadFactory.Finalize();
2666     workload->Execute();
2667
2668     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2669     return result;
2670 }
2671
2672 LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory)
2673 {
2674     constexpr unsigned int inputWidth = 2;
2675     constexpr unsigned int inputHeight = 2;
2676     constexpr unsigned int inputChannels = 1;
2677     constexpr unsigned int inputBatchSize = 1;
2678
2679     constexpr unsigned int outputWidth = inputWidth / 2;
2680     constexpr unsigned int outputHeight = inputHeight / 2;
2681     constexpr unsigned int outputChannels = inputChannels;
2682     constexpr unsigned int outputBatchSize = inputBatchSize;
2683
2684     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2685         armnn::DataType::Float32);
2686     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2687         armnn::DataType::Float32);
2688
2689     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2690         1.0f, 255.0f,
2691         200.0f, 250.f,
2692     }));
2693
2694     // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
2695     // then figures out the interpolants and weights. Note this is different to projecting the centre of the
2696     // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value
2697     // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting
2698     // the centre).
2699     LayerTestResult<float, 4> result(outputTensorInfo);
2700     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2701         1.0f
2702     }));
2703
2704     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2705     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2706
2707     armnn::ResizeBilinearQueueDescriptor descriptor;
2708     armnn::WorkloadInfo info;
2709     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2710     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2711
2712     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2713
2714     inputHandle->Allocate();
2715     outputHandle->Allocate();
2716     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2717
2718     workloadFactory.Finalize();
2719     workload->Execute();
2720
2721     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2722     return result;
2723 }
2724
2725 LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory)
2726 {
2727     constexpr unsigned int inputWidth = 4;
2728     constexpr unsigned int inputHeight = 4;
2729     constexpr unsigned int inputChannels = 1;
2730     constexpr unsigned int inputBatchSize = 1;
2731
2732     constexpr unsigned int outputWidth = inputWidth / 2;
2733     constexpr unsigned int outputHeight = inputHeight / 2;
2734     constexpr unsigned int outputChannels = inputChannels;
2735     constexpr unsigned int outputBatchSize = inputBatchSize;
2736
2737     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2738         armnn::DataType::Float32);
2739     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2740         armnn::DataType::Float32);
2741
2742     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2743         1.0f, 2.0f, 3.0f, 4.0f,
2744         2.0f, 3.0f, 4.0f, 5.0f,
2745         3.0f, 4.0f, 5.0f, 6.0f,
2746         4.0f, 5.0f, 6.0f, 7.0f
2747     }));
2748
2749     LayerTestResult<float, 4> result(outputTensorInfo);
2750     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2751         1.f, 3.f,
2752         3.f, 5.f
2753     }));
2754
2755     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2756     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2757
2758     armnn::ResizeBilinearQueueDescriptor descriptor;
2759     armnn::WorkloadInfo info;
2760     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2761     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2762
2763     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2764
2765     inputHandle->Allocate();
2766     outputHandle->Allocate();
2767     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2768
2769     workloadFactory.Finalize();
2770     workload->Execute();
2771
2772     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2773     return result;
2774 }
2775
2776 LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory)
2777 {
2778     constexpr unsigned int inputWidth = 5;
2779     constexpr unsigned int inputHeight = 3;
2780     constexpr unsigned int inputChannels = 1;
2781     constexpr unsigned int inputBatchSize = 1;
2782
2783     constexpr unsigned int outputWidth = 3;
2784     constexpr unsigned int outputHeight = 2;
2785     constexpr unsigned int outputChannels = inputChannels;
2786     constexpr unsigned int outputBatchSize = inputBatchSize;
2787
2788     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2789         armnn::DataType::Float32);
2790     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2791         armnn::DataType::Float32);
2792
2793     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2794           1.0f,   2.0f,   3.0f,   5.0f,   8.0f,
2795          13.0f,  21.0f,  34.0f,  55.0f,  89.0f,
2796         144.0f, 233.0f, 377.0f, 610.0f, 987.0f
2797     }));
2798
2799     LayerTestResult<float, 4> result(outputTensorInfo);
2800     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2801         1.0f, 2.6666f, 6.0f,
2802         78.5f, 179.3333f, 401.f
2803     }));
2804
2805     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2806     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2807
2808     armnn::ResizeBilinearQueueDescriptor descriptor;
2809     armnn::WorkloadInfo info;
2810     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2811     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2812
2813     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2814
2815     inputHandle->Allocate();
2816     outputHandle->Allocate();
2817     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2818
2819     workloadFactory.Finalize();
2820     workload->Execute();
2821
2822     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2823     return result;
2824 }
2825
2826 LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory)
2827 {
2828     constexpr unsigned int inputWidth = 2;
2829     constexpr unsigned int inputHeight = 3;
2830     constexpr unsigned int inputChannels = 1;
2831     constexpr unsigned int inputBatchSize = 1;
2832
2833     constexpr unsigned int outputWidth = 5;
2834     constexpr unsigned int outputHeight = 3;
2835     constexpr unsigned int outputChannels = inputChannels;
2836     constexpr unsigned int outputBatchSize = inputBatchSize;
2837
2838     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2839         armnn::DataType::Float32);
2840     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2841         armnn::DataType::Float32);
2842
2843     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2844           1.0f,   2.0f,
2845          13.0f,  21.0f,
2846         144.0f, 233.0f
2847     }));
2848
2849     LayerTestResult<float, 4> result(outputTensorInfo);
2850     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2851          1.0f,   1.4f,   1.8f,   2.f,   2.f,
2852          13.f,  16.2f,  19.4f,  21.f,  21.f,
2853         144.f, 179.6f, 215.2f, 233.f, 233.f
2854     }));
2855
2856     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2857     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2858
2859     armnn::ResizeBilinearQueueDescriptor descriptor;
2860     armnn::WorkloadInfo info;
2861     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2862     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2863
2864     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2865
2866     inputHandle->Allocate();
2867     outputHandle->Allocate();
2868     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2869
2870     workloadFactory.Finalize();
2871     workload->Execute();
2872
2873     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2874     return result;
2875 }
2876
2877 LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory)
2878 {
2879     constexpr unsigned int width = 2;
2880     constexpr unsigned int height = 3;
2881
2882     const armnn::TensorInfo tensorInfo({height, width },
2883         armnn::DataType::Float32);
2884     auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
2885        -10.0f,  -5.0f,
2886          0.0f,   5.0f,
2887         10.0f,  10.0f
2888     }));
2889
2890     LayerTestResult<float, 2> ret(tensorInfo);
2891
2892     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
2893
2894     std::unique_ptr<armnn::ITensorHandle> outputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
2895
2896     armnn::FakeQuantizationQueueDescriptor data;
2897     armnn::WorkloadInfo info;
2898
2899     AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
2900     AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
2901     float min = -10.f;
2902     float max = 10.f;
2903
2904     data.m_Parameters.m_Min = min;
2905     data.m_Parameters.m_Max = max;
2906
2907     armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]);
2908     armnn::FakeQuantizationQueueDescriptor refData = data;
2909     armnn::WorkloadInfo refInfo = info;
2910     SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle);
2911
2912     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info);
2913
2914     inputHandle->Allocate();
2915     outputHandle->Allocate();
2916
2917     CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
2918
2919     workloadFactory.Finalize();
2920     workload->Execute();
2921
2922     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
2923
2924     ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
2925         0.0f,     63.0f,
2926         128.0f,   191.0f,
2927         255.0f,   255.0f
2928     }));
2929     return ret;
2930 }
2931
2932 LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory)
2933 {
2934     constexpr unsigned int inputWidth = 1;
2935     constexpr unsigned int inputHeight = 1;
2936     constexpr unsigned int inputChannels = 10;
2937     constexpr unsigned int inputBatchSize = 1;
2938
2939     constexpr unsigned int outputWidth = inputWidth;
2940     constexpr unsigned int outputHeight = inputHeight;
2941     constexpr unsigned int outputChannels = inputChannels;
2942     constexpr unsigned int outputBatchSize = inputBatchSize;
2943
2944     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2945         armnn::DataType::Float32);
2946     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2947         armnn::DataType::Float32);
2948
2949     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2950         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f
2951     }));
2952
2953     const float approxInvL2Norm = 0.050964719f;
2954     LayerTestResult<float, 4> result(outputTensorInfo);
2955     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2956          1.0f * approxInvL2Norm,
2957          2.0f * approxInvL2Norm,
2958          3.0f * approxInvL2Norm,
2959          4.0f * approxInvL2Norm,
2960          5.0f * approxInvL2Norm,
2961          6.0f * approxInvL2Norm,
2962          7.0f * approxInvL2Norm,
2963          8.0f * approxInvL2Norm,
2964          9.0f * approxInvL2Norm,
2965         10.0f * approxInvL2Norm
2966     }));
2967
2968     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2969     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2970
2971     armnn::L2NormalizationQueueDescriptor descriptor;
2972     armnn::WorkloadInfo info;
2973     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2974     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2975
2976     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
2977
2978     inputHandle->Allocate();
2979     outputHandle->Allocate();
2980     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2981
2982     workloadFactory.Finalize();
2983     workload->Execute();
2984
2985     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2986     return result;
2987 }
2988
2989 namespace
2990 {
2991
2992 float CalcInvL2Norm(std::initializer_list<float> elements)
2993 {
2994     const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f,
2995         [](float acc, float element) { return acc + element * element; });
2996     return 1.0f / sqrtf(reduction);
2997 }
2998
2999 }
3000
3001 LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory)
3002 {
3003     constexpr unsigned int inputWidth = 5;
3004     constexpr unsigned int inputHeight = 1;
3005     constexpr unsigned int inputChannels = 2;
3006     constexpr unsigned int inputBatchSize = 1;
3007
3008     constexpr unsigned int outputWidth = inputWidth;
3009     constexpr unsigned int outputHeight = inputHeight;
3010     constexpr unsigned int outputChannels = inputChannels;
3011     constexpr unsigned int outputBatchSize = inputBatchSize;
3012
3013     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3014         armnn::DataType::Float32);
3015     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3016         armnn::DataType::Float32);
3017
3018     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
3019         1.0f, 3.0f, 5.0f, 7.0f,  9.0f,
3020         2.0f, 4.0f, 6.0f, 8.0f, 10.0f
3021     }));
3022
3023     LayerTestResult<float, 4> result(outputTensorInfo);
3024     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
3025          1.0f * CalcInvL2Norm({ 1.0f, 2.0f }),
3026          3.0f * CalcInvL2Norm({ 3.0f, 4.0f }),
3027          5.0f * CalcInvL2Norm({ 5.0f, 6.0f }),
3028          7.0f * CalcInvL2Norm({ 7.0f, 8.0f }),
3029          9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
3030
3031          2.0f * CalcInvL2Norm({ 1.0f, 2.0f }),
3032          4.0f * CalcInvL2Norm({ 3.0f, 4.0f }),
3033          6.0f * CalcInvL2Norm({ 5.0f, 6.0f }),
3034          8.0f * CalcInvL2Norm({ 7.0f, 8.0f }),
3035         10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
3036     }));
3037
3038     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3039     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3040
3041     armnn::L2NormalizationQueueDescriptor descriptor;
3042     armnn::WorkloadInfo info;
3043     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3044     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3045
3046     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
3047
3048     inputHandle->Allocate();
3049     outputHandle->Allocate();
3050     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3051
3052     workloadFactory.Finalize();
3053     workload->Execute();
3054
3055     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3056     return result;
3057 }
3058
3059 LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory)
3060 {
3061     constexpr unsigned int inputWidth = 3;
3062     constexpr unsigned int inputHeight = 4;
3063     constexpr unsigned int inputChannels = 2;
3064     constexpr unsigned int inputBatchSize = 1;
3065
3066     constexpr unsigned int outputWidth = inputWidth;
3067     constexpr unsigned int outputHeight = inputHeight;
3068     constexpr unsigned int outputChannels = inputChannels;
3069     constexpr unsigned int outputBatchSize = inputBatchSize;
3070
3071     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3072         armnn::DataType::Float32);
3073     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3074         armnn::DataType::Float32);
3075
3076     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
3077         // Channel 0
3078         119.0f,  21.0f, 150.0f,
3079         149.0f,  32.0f, 179.0f,
3080          15.0f, 227.0f, 141.0f,
3081         147.0f, 199.0f, 220.0f,
3082
3083         // Channel 1
3084         110.0f, 140.0f,  73.0f,
3085         211.0f, 212.0f,  89.0f,
3086          24.0f, 138.0f, 188.0f,
3087         162.0f,  12.0f, 161.0f,
3088     }));
3089
3090     LayerTestResult<float, 4> result(outputTensorInfo);
3091     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
3092         119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
3093          21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
3094         150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
3095         149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
3096          32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
3097         179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
3098          15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
3099         227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
3100         141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
3101         147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
3102         199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
3103         220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
3104
3105         110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
3106         140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
3107          73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
3108         211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
3109         212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
3110          89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
3111          24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
3112         138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
3113         188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
3114         162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
3115          12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
3116         161.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
3117     }));
3118
3119     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3120     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3121
3122     armnn::L2NormalizationQueueDescriptor descriptor;
3123     armnn::WorkloadInfo info;
3124     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3125     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3126
3127     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
3128
3129     inputHandle->Allocate();
3130     outputHandle->Allocate();
3131     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3132
3133     workloadFactory.Finalize();
3134     workload->Execute();
3135
3136     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3137     return result;
3138 }
3139
3140 LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory)
3141 {
3142     constexpr unsigned int inputWidth = 3;
3143     constexpr unsigned int inputHeight = 4;
3144     constexpr unsigned int inputChannels = 3;
3145     constexpr unsigned int inputBatchSize = 2;
3146
3147     constexpr unsigned int outputWidth = inputWidth;
3148     constexpr unsigned int outputHeight = inputHeight;
3149     constexpr unsigned int outputChannels = inputChannels;
3150     constexpr unsigned int outputBatchSize = inputBatchSize;
3151
3152     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3153         armnn::DataType::Float32);
3154     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3155         armnn::DataType::Float32);
3156
3157     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
3158         // Batch 0, Channel 0
3159         235.0f,  46.0f, 178.0f,
3160         100.0f, 123.0f,  19.0f,
3161         172.0f,  74.0f, 250.0f,
3162           6.0f, 195.0f,  80.0f,
3163
3164         // Batch 0, Channel 1
3165         113.0f,  95.0f, 202.0f,
3166          77.0f, 114.0f,  71.0f,
3167         122.0f, 246.0f, 166.0f,
3168          82.0f,  28.0f,  37.0f,
3169
3170         // Batch 0, Channel 2
3171          56.0f, 170.0f, 162.0f,
3172         194.0f,  89.0f, 254.0f,
3173          12.0f, 209.0f, 200.0f,
3174           1.0f,  64.0f,  54.0f,
3175
3176         // Batch 1, Channel 0
3177          67.0f,  90.0f,  49.0f,
3178           7.0f, 163.0f,  18.0f,
3179          25.0f, 117.0f, 103.0f,
3180         247.0f,  59.0f, 189.0f,
3181
3182         // Batch 1, Channel 1
3183         239.0f, 104.0f, 199.0f,
3184          17.0f, 124.0f, 153.0f,
3185         222.0f, 217.0f, 75.0f,
3186          32.0f, 126.0f, 21.0f,
3187
3188         // Batch 1, Channel 2
3189          97.0f, 145.0f, 215.0f,
3190         115.0f, 116.0f, 238.0f,
3191         226.0f,  16.0f, 132.0f,
3192          92.0f, 125.0f,  88.0f,
3193     }));
3194
3195     LayerTestResult<float, 4> result(outputTensorInfo);
3196     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
3197
3198         // Batch 0, Channel 0
3199         235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
3200          46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
3201         178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
3202         100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
3203         123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
3204          19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
3205         172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
3206          74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
3207         250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
3208           6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
3209         195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
3210          80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
3211
3212         // Batch 0, Channel 1
3213         113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
3214          95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
3215         202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
3216          77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
3217         114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
3218          71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
3219         122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
3220         246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
3221         166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
3222          82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
3223          28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
3224          37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
3225
3226         // Batch 0, Channel 2
3227          56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
3228         170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
3229         162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
3230         194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
3231          89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
3232         254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
3233          12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
3234         209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
3235         200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
3236           1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
3237          64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
3238          54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
3239
3240         // Batch 1, Channel 0
3241          67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
3242          90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
3243          49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
3244           7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
3245         163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
3246          18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
3247          25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
3248         117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
3249         103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
3250         247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
3251          59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
3252         189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
3253
3254         // Batch 1, Channel 1
3255         239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
3256         104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
3257         199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
3258          17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
3259         124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
3260         153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
3261         222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
3262         217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
3263          75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
3264          32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
3265         126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
3266          21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
3267
3268         // Batch 1, Channel 2
3269          97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
3270         145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
3271         215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
3272         115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
3273         116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
3274         238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
3275         226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
3276          16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
3277         132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
3278          92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
3279         125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
3280          88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
3281     }));
3282
3283     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3284     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3285
3286     armnn::L2NormalizationQueueDescriptor descriptor;
3287     armnn::WorkloadInfo info;
3288     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3289     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3290
3291     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
3292
3293     inputHandle->Allocate();
3294     outputHandle->Allocate();
3295     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3296
3297     workloadFactory.Finalize();
3298     workload->Execute();
3299
3300     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3301     return result;
3302 }
3303
3304 template <typename T>
3305 LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory,
3306     float qScale,
3307     int32_t qOffset)
3308 {
3309     constexpr unsigned int inputWidth = 3;
3310     constexpr unsigned int inputHeight = 4;
3311     constexpr unsigned int inputChannels = 3;
3312     constexpr unsigned int inputBatchSize = 2;
3313
3314     constexpr unsigned int outputWidth = inputWidth;
3315     constexpr unsigned int outputHeight = inputHeight;
3316     constexpr unsigned int outputChannels = inputChannels;
3317     constexpr unsigned int outputBatchSize = inputBatchSize;
3318
3319     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3320         armnn::GetDataType<T>());
3321
3322     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3323         armnn::GetDataType<T>());
3324
3325     // Set quantization parameters if the requested type is a quantized type.
3326     if(armnn::IsQuantizedType<T>())
3327     {
3328         inputTensorInfo.SetQuantizationScale(qScale);
3329         inputTensorInfo.SetQuantizationOffset(qOffset);
3330         outputTensorInfo.SetQuantizationScale(qScale);
3331         outputTensorInfo.SetQuantizationOffset(qOffset);
3332     }
3333
3334     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
3335         QuantizedVector<T>(qScale, qOffset, {
3336         // Batch 0, Channel 0
3337         235.0f,  46.0f, 178.0f,
3338         100.0f, 123.0f,  19.0f,
3339         172.0f,  74.0f, 250.0f,
3340           6.0f, 195.0f,  80.0f,
3341
3342         // Batch 0, Channel 1
3343         113.0f,  95.0f, 202.0f,
3344          77.0f, 114.0f,  71.0f,
3345         122.0f, 246.0f, 166.0f,
3346          82.0f,  28.0f,  37.0f,
3347
3348         // Batch 0, Channel 2
3349          56.0f, 170.0f, 162.0f,
3350         194.0f,  89.0f, 254.0f,
3351          12.0f, 209.0f, 200.0f,
3352           1.0f,  64.0f,  54.0f,
3353
3354         // Batch 1, Channel 0
3355          67.0f,  90.0f,  49.0f,
3356           7.0f, 163.0f,  18.0f,
3357          25.0f, 117.0f, 103.0f,
3358         247.0f,  59.0f, 189.0f,
3359
3360         // Batch 1, Channel 1
3361         239.0f, 104.0f, 199.0f,
3362          17.0f, 124.0f, 153.0f,
3363         222.0f, 217.0f, 75.0f,
3364          32.0f, 126.0f, 21.0f,
3365
3366         // Batch 1, Channel 2
3367          97.0f, 145.0f, 215.0f,
3368         115.0f, 116.0f, 238.0f,
3369         226.0f,  16.0f, 132.0f,
3370          92.0f, 125.0f,  88.0f,
3371     })));
3372
3373     LayerTestResult<T, 4> result(outputTensorInfo);
3374     result.outputExpected = input;
3375
3376     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3377
3378     armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo);
3379     AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]);
3380
3381     armnn::ConstantQueueDescriptor descriptor;
3382     descriptor.m_LayerOutput = &constantTensor;
3383
3384     armnn::WorkloadInfo info;
3385     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3386
3387     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info);
3388
3389     outputHandle->Allocate();
3390
3391     workloadFactory.Finalize();
3392     workload->Execute();
3393
3394     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3395     return result;
3396 }
3397
3398 LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory)
3399 {
3400     return ConstantTestImpl<float>(workloadFactory, 0.0f, 0);
3401 }
3402
3403 LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory)
3404 {
3405     return ConstantTestImpl<uint8_t>(workloadFactory, 1.0f, 0);
3406 }
3407
3408 LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory)
3409 {
3410     unsigned int outputWidth = 3;
3411     unsigned int outputHeight = 6;
3412     unsigned int outputChannels = 3;
3413
3414     unsigned int inputWidth1 = 3;
3415     unsigned int inputHeight1 = 6;
3416     unsigned int inputChannels1 = 2;
3417
3418     unsigned int inputWidth2 = 3;
3419     unsigned int inputHeight2 = 6;
3420     unsigned int inputChannels2 = 1;
3421
3422     // Defines the tensor descriptors.
3423     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
3424     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
3425     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
3426
3427     // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize them.
3428     const float scale = 0.13497836f;
3429     const int32_t offset = -7;
3430
3431     outputTensorInfo.SetQuantizationScale(scale);
3432     outputTensorInfo.SetQuantizationOffset(offset);
3433     inputTensorInfo1.SetQuantizationScale(scale);
3434     inputTensorInfo1.SetQuantizationOffset(offset);
3435     inputTensorInfo2.SetQuantizationScale(scale);
3436     inputTensorInfo2.SetQuantizationOffset(offset);
3437
3438     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
3439
3440     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
3441         {
3442             1, 2, 3,
3443             4, 5, 6,
3444             7, 8, 9,
3445             10, 11, 12,
3446             13, 14, 15,
3447             16, 17, 18,
3448
3449             19, 20, 21,
3450             22, 23, 24,
3451             25, 26, 27,
3452             28, 29, 30,
3453             31, 32, 33,
3454             34, 35, 36,
3455
3456             37, 38, 39,
3457             40, 41, 42,
3458             43, 44, 45,
3459             46, 47, 48,
3460             49, 50, 51,
3461             52, 53, 54,
3462         })
3463     );
3464
3465     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
3466     {
3467         1, 2, 3,
3468         4, 5, 6,
3469         7, 8, 9,
3470         10, 11, 12,
3471         13, 14, 15,
3472         16, 17, 18,
3473
3474         19, 20, 21,
3475         22, 23, 24,
3476         25, 26, 27,
3477         28, 29, 30,
3478         31, 32, 33,
3479         34, 35, 36,
3480     })
3481     );
3482
3483     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
3484     {
3485         37, 38, 39,
3486         40, 41, 42,
3487         43, 44, 45,
3488         46, 47, 48,
3489         49, 50, 51,
3490         52, 53, 54,
3491     })
3492     );
3493
3494     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
3495     armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
3496
3497     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
3498     armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
3499
3500
3501     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3502
3503     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
3504
3505     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
3506         subTensorsSupported ?
3507             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
3508             workloadFactory.CreateTensorHandle(inputTensorInfo1);
3509
3510     std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
3511         subTensorsSupported ?
3512             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
3513             workloadFactory.CreateTensorHandle(inputTensorInfo2);
3514
3515
3516     armnn::MergerQueueDescriptor data;
3517     armnn::WorkloadInfo info;
3518     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3519     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3520     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3521
3522     data.m_ViewOrigins.push_back(window1);
3523     data.m_ViewOrigins.push_back(window2);
3524
3525     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
3526
3527     inputHandle1->Allocate();
3528     inputHandle2->Allocate();
3529     outputHandle->Allocate();
3530
3531     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
3532     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
3533
3534     workloadFactory.Finalize();
3535     workload->Execute();
3536
3537     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
3538
3539     return ret;
3540 }
3541
3542 LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory)
3543 {
3544     unsigned int batchSize = 1;
3545     unsigned int channels = 2;
3546     unsigned int height = 2;
3547     unsigned int width = 3;
3548
3549     const float scale = 7.0f;
3550     const int32_t offset = 3;
3551
3552     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
3553     armnn::TensorInfo outputTensorInfo;
3554
3555     const unsigned int shape[] = { batchSize, channels, height, width };
3556     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3557     inputTensorInfo1.SetQuantizationScale(scale);
3558     inputTensorInfo1.SetQuantizationOffset(offset);
3559
3560     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3561     inputTensorInfo2.SetQuantizationScale(scale);
3562     inputTensorInfo2.SetQuantizationOffset(offset);
3563
3564     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3565     outputTensorInfo.SetQuantizationScale(scale);
3566     outputTensorInfo.SetQuantizationOffset(offset);
3567
3568     // See dequantized values to the right.
3569     auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
3570     {
3571          63,  35,  77,  70,  56, 112, //  420, 224,  518,  469,  371, 763
3572         203,  28, 252, 168, 245,  91  // 1400, 175, 1743, 1155, 1694, 616
3573     }));
3574
3575     // See dequantized values to the right.
3576     auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
3577     {
3578          21,   7, 175, 231, 175, 210, // 126,   28, 1204, 1596, 1204, 1449
3579         126, 161,  63,  21, 105, 126  // 861, 1106,  420,  126,  714,  861
3580     }));
3581
3582     // See dequantized values to the right.
3583     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3584     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>(
3585     {
3586          81,  39, 249, 255, 228, 255, //  546,  252, 1722, 2065(clamped), 1575, 2212(clamped)
3587         255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477
3588     }));
3589
3590     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3591     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
3592     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3593
3594     armnn::AdditionQueueDescriptor data;
3595     armnn::WorkloadInfo info;
3596     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3597     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3598     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3599
3600     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
3601
3602     inputHandle1->Allocate();
3603     inputHandle2->Allocate();
3604     outputHandle->Allocate();
3605
3606     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3607     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
3608
3609     workloadFactory.Finalize();
3610     workload->Execute();
3611
3612     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3613
3614     return result;
3615 }
3616
3617 namespace
3618 {
3619 LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory,
3620                                                           const unsigned int shape0[4],
3621                                                           const std::vector<uint8_t> & values0,
3622                                                           float scale0,
3623                                                           int32_t offset0,
3624                                                           const unsigned int shape1[4],
3625                                                           const std::vector<uint8_t> & values1,
3626                                                           float scale1,
3627                                                           int32_t offset1,
3628                                                           const unsigned int outShape[4],
3629                                                           const std::vector<uint8_t> & outValues,
3630                                                           float outScale,
3631                                                           int32_t outOffset)
3632 {
3633     armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8);
3634     armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8);
3635     armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8);
3636
3637     inputTensorInfo0.SetQuantizationScale(scale0);
3638     inputTensorInfo0.SetQuantizationOffset(offset0);
3639
3640     inputTensorInfo1.SetQuantizationScale(scale1);
3641     inputTensorInfo1.SetQuantizationOffset(offset1);
3642
3643     outputTensorInfo.SetQuantizationScale(outScale);
3644     outputTensorInfo.SetQuantizationOffset(outOffset);
3645
3646     auto input0 = MakeTensor<uint8_t, 4>(inputTensorInfo0, values0);
3647     auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, values1);
3648
3649     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3650     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, outValues);
3651
3652     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
3653     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3654     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3655
3656     armnn::MultiplicationQueueDescriptor data;
3657     armnn::WorkloadInfo info;
3658     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
3659     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
3660     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3661
3662     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
3663
3664     inputHandle0->Allocate();
3665     inputHandle1->Allocate();
3666     outputHandle->Allocate();
3667
3668     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
3669     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3670
3671     workloadFactory.Finalize();
3672     workload->Execute();
3673
3674     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3675
3676     return result;
3677 }
3678 } // anonymous namespace
3679
3680 LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
3681 {
3682     unsigned int batchSize = 1;
3683     unsigned int channels = 2;
3684     unsigned int height = 2;
3685     unsigned int width = 3;
3686     const unsigned int shape[] = { batchSize, channels, height, width };
3687
3688     // See dequantized values to the right.
3689     std::vector<uint8_t> input0({
3690          62,  37,   3, 172,  13, 111, // 244, 144,   8, 684,  48, 440,
3691         188,  20,  73,  31,  23,  31  // 748,  76, 288, 120,  88, 120
3692     });
3693
3694     // See dequantized values to the right.
3695     std::vector<uint8_t> input1({
3696         126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
3697          48, 115, 151,  79,  78,  97  // 150, 351, 459, 243, 240, 297
3698     });
3699
3700     // See dequantized values to the right.
3701     std::vector<uint8_t> output(
3702     {
3703          64,  72,   0, 255,   8, 236, //  93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
3704          77,  15,  92,  16,  10,  21, // 112200,  26676,        132192,           29160, 21120,  35640
3705     });
3706
3707     return MultiplicationUint8TestHelper(workloadFactory,
3708                                          shape,
3709                                          input0,
3710                                          4.0f,
3711                                          1,
3712                                          shape,
3713                                          input1,
3714                                          3.0f,
3715                                          -2,
3716                                          shape,
3717                                          output,
3718                                          1366.255f, // Scale/offset chosen to have output values out of range.
3719                                          -5);
3720 }
3721
3722 LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
3723 {
3724     const unsigned int shape0[] = { 1, 2, 2, 3 };
3725     const unsigned int shape1[] = { 1, 1, 1, 1 };
3726
3727     std::vector<uint8_t> input0({
3728         1, 2, 3,    4,  5,  6,
3729         7, 8, 9,   10, 11, 12
3730     });
3731
3732     std::vector<uint8_t> input1({2});
3733
3734     std::vector<uint8_t> output({
3735         2,  4,   6,     8, 10, 12,
3736         14, 16, 18,    20, 22, 24
3737     });
3738
3739     return MultiplicationUint8TestHelper(workloadFactory,
3740                                          shape0,
3741                                          input0,
3742                                          1.0f,
3743                                          0,
3744                                          shape1,
3745                                          input1,
3746                                          1.0f,
3747                                          0,
3748                                          shape0,
3749                                          output,
3750                                          1.0f,
3751                                          0);
3752 }
3753
3754 LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory)
3755 {
3756     const unsigned int shape0[] = { 1, 2, 2, 3 };
3757     const unsigned int shape1[] = { 1, 1, 1, 3 };
3758
3759     std::vector<uint8_t> input0({
3760         1, 2, 3,    4,  5,  6,
3761         7, 8, 9,   10, 11, 12
3762     });
3763
3764     std::vector<uint8_t> input1({1, 2, 3});
3765
3766     std::vector<uint8_t> output({
3767         1,  4,   9,     4, 10, 18,
3768         7, 16,  27,    10, 22, 36
3769     });
3770
3771     return MultiplicationUint8TestHelper(workloadFactory,
3772                                          shape0,
3773                                          input0,
3774                                          1.0f,
3775                                          0,
3776                                          shape1,
3777                                          input1,
3778                                          1.0f,
3779                                          0,
3780                                          shape0,
3781                                          output,
3782                                          1.0f,
3783                                          0);
3784 }
3785
3786 LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory)
3787 {
3788     constexpr unsigned int inputWidth = 4;
3789     constexpr unsigned int inputHeight = 4;
3790     constexpr unsigned int inputChannels = 1;
3791     constexpr unsigned int inputBatchSize = 1;
3792
3793     constexpr unsigned int outputWidth = inputWidth;
3794     constexpr unsigned int outputHeight = inputHeight;
3795     constexpr unsigned int outputChannels = inputChannels;
3796     constexpr unsigned int outputBatchSize = inputBatchSize;
3797
3798     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3799         armnn::DataType::QuantisedAsymm8);
3800     inputTensorInfo.SetQuantizationScale(1.5f);
3801     inputTensorInfo.SetQuantizationOffset(-3);
3802
3803     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3804         armnn::DataType::QuantisedAsymm8);
3805     outputTensorInfo.SetQuantizationScale(1.5f);
3806     outputTensorInfo.SetQuantizationOffset(-3);
3807
3808     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3809         1, 2, 3, 4,
3810         2, 3, 4, 5,
3811         3, 4, 5, 6,
3812         4, 5, 6, 7
3813     }));
3814
3815     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3816     result.outputExpected = input;
3817
3818     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3819     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3820
3821     armnn::ResizeBilinearQueueDescriptor descriptor;
3822     armnn::WorkloadInfo info;
3823     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3824     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3825
3826     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3827
3828     inputHandle->Allocate();
3829     outputHandle->Allocate();
3830     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3831
3832     workloadFactory.Finalize();
3833     workload->Execute();
3834
3835     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3836     return result;
3837 }
3838
3839 LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory)
3840 {
3841     constexpr unsigned int inputWidth = 2;
3842     constexpr unsigned int inputHeight = 2;
3843     constexpr unsigned int inputChannels = 1;
3844     constexpr unsigned int inputBatchSize = 1;
3845
3846     constexpr unsigned int outputWidth = inputWidth / 2;
3847     constexpr unsigned int outputHeight = inputHeight / 2;
3848     constexpr unsigned int outputChannels = inputChannels;
3849     constexpr unsigned int outputBatchSize = inputBatchSize;
3850
3851     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3852         armnn::DataType::QuantisedAsymm8);
3853     inputTensorInfo.SetQuantizationScale(0.1567f);
3854     inputTensorInfo.SetQuantizationOffset(1);
3855
3856     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3857         armnn::DataType::QuantisedAsymm8);
3858     outputTensorInfo.SetQuantizationScale(0.1567f);
3859     outputTensorInfo.SetQuantizationOffset(1);
3860
3861     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3862         1, 255,
3863         200, 250
3864     }));
3865
3866     // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
3867     // then figures out the interpolants and weights. Note this is different to projecting the centre of the
3868     // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value
3869     // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting
3870     // the centre).
3871     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3872     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3873         1
3874     }));
3875
3876     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3877     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3878
3879     armnn::ResizeBilinearQueueDescriptor descriptor;
3880     armnn::WorkloadInfo info;
3881     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3882     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3883
3884     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3885
3886     inputHandle->Allocate();
3887     outputHandle->Allocate();
3888     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3889
3890     workloadFactory.Finalize();
3891     workload->Execute();
3892
3893     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3894     return result;
3895 }
3896
3897 LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory)
3898 {
3899     constexpr unsigned int inputWidth = 4;
3900     constexpr unsigned int inputHeight = 4;
3901     constexpr unsigned int inputChannels = 1;
3902     constexpr unsigned int inputBatchSize = 1;
3903
3904     constexpr unsigned int outputWidth = inputWidth / 2;
3905     constexpr unsigned int outputHeight = inputHeight / 2;
3906     constexpr unsigned int outputChannels = inputChannels;
3907     constexpr unsigned int outputBatchSize = inputBatchSize;
3908
3909     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3910         armnn::DataType::QuantisedAsymm8);
3911     inputTensorInfo.SetQuantizationScale(3.141592f);
3912     inputTensorInfo.SetQuantizationOffset(3);
3913
3914     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3915         armnn::DataType::QuantisedAsymm8);
3916     outputTensorInfo.SetQuantizationScale(3.141592f);
3917     outputTensorInfo.SetQuantizationOffset(3);
3918
3919     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3920         1, 2, 3, 4,
3921         2, 3, 4, 5,
3922         3, 4, 5, 6,
3923         4, 5, 6, 7
3924     }));
3925
3926     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3927     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3928         1, 3,
3929         3, 5
3930     }));
3931
3932     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3933     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3934
3935     armnn::ResizeBilinearQueueDescriptor descriptor;
3936     armnn::WorkloadInfo info;
3937     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3938     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3939
3940     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3941
3942     inputHandle->Allocate();
3943     outputHandle->Allocate();
3944     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3945
3946     workloadFactory.Finalize();
3947     workload->Execute();
3948
3949     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3950     return result;
3951 }
3952
3953 LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory)
3954 {
3955     constexpr unsigned int inputWidth = 3;
3956     constexpr unsigned int inputHeight = 2;
3957     constexpr unsigned int inputChannels = 1;
3958     constexpr unsigned int inputBatchSize = 1;
3959
3960     constexpr unsigned int outputWidth = 2;
3961     constexpr unsigned int outputHeight = 1;
3962     constexpr unsigned int outputChannels = inputChannels;
3963     constexpr unsigned int outputBatchSize = inputBatchSize;
3964
3965     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3966         armnn::DataType::QuantisedAsymm8);
3967     inputTensorInfo.SetQuantizationScale(1.5f);
3968     inputTensorInfo.SetQuantizationOffset(-1);
3969
3970     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3971         armnn::DataType::QuantisedAsymm8);
3972     outputTensorInfo.SetQuantizationScale(1.5f);
3973     outputTensorInfo.SetQuantizationOffset(-1);
3974
3975     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3976         1,  2,  3, // 3.0, 4.5, 6.0
3977         5,  8, 13  // 9.0, 13.5, 21.0
3978     }));
3979
3980     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3981     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3982         1, 3 // 3.0, 5.25
3983     }));
3984
3985     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3986     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3987
3988     armnn::ResizeBilinearQueueDescriptor descriptor;
3989     armnn::WorkloadInfo info;
3990     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3991     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3992
3993     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3994
3995     inputHandle->Allocate();
3996     outputHandle->Allocate();
3997
3998     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3999
4000     workloadFactory.Finalize();
4001     workload->Execute();
4002
4003     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
4004     return result;
4005 }
4006
4007 LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory)
4008 {
4009     constexpr unsigned int inputWidth = 2;
4010     constexpr unsigned int inputHeight = 3;
4011     constexpr unsigned int inputChannels = 1;
4012     constexpr unsigned int inputBatchSize = 1;
4013
4014     constexpr unsigned int outputWidth = 5;
4015     constexpr unsigned int outputHeight = 3;
4016     constexpr unsigned int outputChannels = inputChannels;
4017     constexpr unsigned int outputBatchSize = inputBatchSize;
4018
4019     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
4020         armnn::DataType::QuantisedAsymm8);
4021     inputTensorInfo.SetQuantizationScale(0.010765f);
4022     inputTensorInfo.SetQuantizationOffset(7);
4023
4024     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
4025         armnn::DataType::QuantisedAsymm8);
4026     outputTensorInfo.SetQuantizationScale(0.010132f);
4027     outputTensorInfo.SetQuantizationOffset(-18);
4028
4029     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
4030          24, 228, // 0.183005, 2.379065,
4031         105, 128, // 1.05497, 1.302565
4032         230,  71  // 2.400595, 0.68896
4033     }));
4034
4035     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
4036     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
4037           0,  87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504
4038          86,  96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498
4039         219, 151,  84,  50,  50  // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002
4040     }));
4041
4042     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
4043     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4044
4045     armnn::ResizeBilinearQueueDescriptor descriptor;
4046     armnn::WorkloadInfo info;
4047     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
4048     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
4049
4050     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
4051
4052     inputHandle->Allocate();
4053     outputHandle->Allocate();
4054     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
4055
4056     workloadFactory.Finalize();
4057     workload->Execute();
4058
4059     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
4060     return result;
4061 }
4062
4063 LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory)
4064 {
4065     auto ret = BatchNormTestImpl<float>(workloadFactory, 0.f, 0);
4066     return ret;
4067 }
4068
4069 LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory)
4070 {
4071     auto ret = BatchNormTestImpl<uint8_t>(workloadFactory, 1.f/20.f, 50);
4072     return ret;
4073 }
4074
4075 LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory)
4076 {
4077     return ConstantTestImpl<uint8_t>(workloadFactory, 2e-6f, 1);
4078 }
4079
4080 LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4081 {
4082     return Concatenation1dTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4083 }
4084
4085 LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4086 {
4087     return Concatenation2dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4088 }
4089
4090 LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4091 {
4092     return Concatenation2dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4093 }
4094
4095 LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
4096 {
4097     return Concatenation2dDim0DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4098 }
4099
4100 LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
4101 {
4102     return Concatenation2dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4103 }
4104
4105 LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4106 {
4107     return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4108 }
4109
4110 LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4111 {
4112     return Concatenation3dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4113 }
4114
4115 LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4116 {
4117     return Concatenation3dDim2TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4118 }
4119
4120 LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
4121 {
4122     return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4123 }
4124
4125 LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
4126 {
4127     return Concatenation3dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4128 }
4129
4130 LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
4131 {
4132     return Concatenation3dDim2DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
4133 }
4134
4135 LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
4136                                                                  bool forceNoPadding)
4137 {
4138     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
4139 }
4140
4141 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory,
4142                                                                         bool forceNoPadding)
4143 {
4144     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<uint8_t>(workloadFactory, forceNoPadding, 3.0f, -5);
4145 }
4146
4147 LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory,
4148                                                                  bool forceNoPadding)
4149 {
4150     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<float>(workloadFactory, forceNoPadding);
4151 }
4152
4153 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory,
4154                                                                         bool forceNoPadding)
4155 {
4156     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<uint8_t>(workloadFactory, forceNoPadding, 0.1f, 128);
4157 }
4158
4159 LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4160 {
4161     return SimpleAveragePooling2dTestCommon<float>(workloadFactory);
4162 }
4163
4164 LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4165 {
4166     return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
4167 }
4168
4169 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
4170                                                                             bool forceNoPadding)
4171 {
4172     return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
4173 }
4174
4175 LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4176 {
4177     return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory);
4178 }
4179
4180 LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4181 {
4182     return LargeTensorsAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
4183 }
4184
4185 LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4186 {
4187     return SimpleL2Pooling2dTestCommon<float>(workloadFactory);
4188 }
4189
4190 LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4191 {
4192     return SimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory);
4193 }
4194
4195 LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory)
4196 {
4197     return L2Pooling2dSize3Stride1TestCommon<float>(workloadFactory);
4198 }
4199
4200 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4201 {
4202     return L2Pooling2dSize3Stride1TestCommon<uint8_t>(workloadFactory);
4203 }
4204
4205 LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory)
4206 {
4207     return L2Pooling2dSize3Stride3TestCommon<float>(workloadFactory);
4208 }
4209
4210 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4211 {
4212     return L2Pooling2dSize3Stride3TestCommon<uint8_t>(workloadFactory);
4213 }
4214
4215 LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory)
4216 {
4217     return L2Pooling2dSize3Stride4TestCommon<float>(workloadFactory);
4218 }
4219
4220 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4221 {
4222     return L2Pooling2dSize3Stride4TestCommon<uint8_t>(workloadFactory);
4223 }
4224
4225 LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory)
4226 {
4227     return L2Pooling2dSize7TestCommon<float>(workloadFactory);
4228 }
4229
4230 LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4231 {
4232     return L2Pooling2dSize7TestCommon<uint8_t>(workloadFactory);
4233 }
4234
4235 LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory)
4236 {
4237     return L2Pooling2dSize9TestCommon<float>(workloadFactory);
4238 }
4239
4240 LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4241 {
4242     return L2Pooling2dSize9TestCommon<uint8_t>(workloadFactory);
4243 }
4244
4245 LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4246 {
4247     return AsymmetricNonSquarePooling2dTestCommon<float>(workloadFactory);
4248 }
4249
4250 LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4251 {
4252     return AsymmetricNonSquarePooling2dTestCommon<uint8_t>(workloadFactory);
4253 }
4254
4255 LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory,
4256                                                armnn::IWorkloadFactory& refWorkloadFactory,
4257                                                armnn::PoolingAlgorithm  poolingType)
4258 {
4259     return ComparePooling2dTestCommon<float>(workloadFactory, refWorkloadFactory, poolingType);
4260 }
4261
4262 LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
4263                                                       armnn::IWorkloadFactory& refWorkloadFactory,
4264                                                       armnn::PoolingAlgorithm  poolingType)
4265 {
4266     return ComparePooling2dTestCommon<uint8_t>(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128);
4267 }
4268
4269 LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory,
4270                                                   bool transposeWeights)
4271 {
4272     return FullyConnectedLargeTestCommon<float>(workloadFactory, transposeWeights);
4273 }
4274
4275 LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4276 {
4277     return IgnorePaddingSimpleMaxPooling2dTestCommon<float>(workloadFactory);
4278 }
4279
4280 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4281 {
4282     return IgnorePaddingSimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, 1.0f, -5);
4283 }
4284
4285 LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
4286 {
4287     return IgnorePaddingMaxPooling2dSize3TestCommon<float>(workloadFactory);
4288 }
4289
4290 LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4291 {
4292     return IgnorePaddingMaxPooling2dSize3TestCommon<uint8_t>(workloadFactory, 1.0f, -5);
4293 }
4294
4295 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4296 {
4297     return IgnorePaddingSimpleAveragePooling2dTestCommon<float>(workloadFactory);
4298 }
4299
4300 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4301 {
4302     return IgnorePaddingSimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory);
4303 }
4304
4305 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory)
4306 {
4307     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<float>(workloadFactory);
4308 }
4309
4310 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
4311     armnn::IWorkloadFactory& workloadFactory)
4312 {
4313     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<uint8_t>(workloadFactory);
4314 }
4315
4316 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
4317 {
4318     return IgnorePaddingAveragePooling2dSize3TestCommon<float>(workloadFactory);
4319 }
4320
4321 LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4322 {
4323     return IgnorePaddingAveragePooling2dSize3TestCommon<uint8_t>(workloadFactory);
4324 }
4325
4326 LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory)
4327 {
4328     return IgnorePaddingSimpleL2Pooling2dTestCommon<float>(workloadFactory);
4329 }
4330
4331 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
4332 {
4333     return IgnorePaddingSimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory);
4334 }
4335
4336 LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
4337 {
4338     return IgnorePaddingL2Pooling2dSize3TestCommon<float>(workloadFactory);
4339 }
4340
4341 LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
4342 {
4343     return IgnorePaddingL2Pooling2dSize3TestCommon<uint8_t>(workloadFactory);
4344 }
4345
4346 LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory)
4347 {
4348     return SimplePermuteFloat32TestCommon(workloadFactory);
4349 };
4350
4351 LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory)
4352 {
4353     return SimplePermuteUint8TestCommon(workloadFactory);
4354 };
4355
4356 LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory)
4357 {
4358     return PermuteFloat32ValueSet1TestCommon(workloadFactory);
4359 };
4360
4361 LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory)
4362 {
4363     return PermuteFloat32ValueSet2TestCommon(workloadFactory);
4364 };
4365
4366 LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory)
4367 {
4368     return PermuteFloat32ValueSet3TestCommon(workloadFactory);
4369 };