src/backends/backendsCommon/test/Conv2dTestImpl.hpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5 #pragma once
   6
   7 #include <string>
   8 #include <armnn/ArmNN.hpp>
   9 #include <armnn/Tensor.hpp>
  10 #include <armnn/TypesUtils.hpp>
  11
  12 #include <test/TensorHelpers.hpp>
  13 #include "QuantizeHelper.hpp"
  14
  15 #include <backendsCommon/CpuTensorHandle.hpp>
  16 #include <backendsCommon/WorkloadFactory.hpp>
  17 #include "Permute.hpp"
  18 #include <boost/numeric/conversion/cast.hpp>
  19
  20 // Mapping from input type to bias type for fully connected layers.
  21 // float => float, uint8_t => int32_t
  22 template<typename T>
  23 struct FullyConnectedBiasTypeForInputType;
  24
  25 template<>
  26 struct FullyConnectedBiasTypeForInputType<float>
  27 {
  28     using Type = float;
  29 };
  30
  31 template<>
  32 struct FullyConnectedBiasTypeForInputType<uint8_t>
  33 {
  34     using Type = int32_t;
  35 };
  36
  37 // Modifies a std::vector in-place using a specified bias.
  38 template<typename T, typename B>
  39 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
  40     const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
  41 {
  42     BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
  43                      "Invalid type and parameter combination.");
  44     BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
  45                      "Invalid type and parameter combination.");
  46
  47     // Note we need to dequantize and re-quantize the image value and the bias.
  48     for (uint32_t i = 0; i < bias.size(); ++i)
  49     {
  50         float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
  51         for (uint32_t y = 0; y < h; ++y)
  52         {
  53             for (uint32_t x = 0; x < w; ++x)
  54             {
  55                 uint32_t offset = (i * h + y) * w + x;
  56                 BOOST_ASSERT(offset < v.size());
  57                 T& outRef = v[offset];
  58                 float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
  59                 outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
  60             }
  61         }
  62     }
  63 }
  64
  65 template<typename T, typename B>
  66 LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
  67                                                   const boost::multi_array<T, 4>& originalInput,
  68                                                   const boost::multi_array<T, 4>& originalKernel,
  69                                                   const boost::multi_array<B, 1>& bias,
  70                                                   const boost::multi_array<T, 4>& originalOutputExpected,
  71                                                   float qScale,
  72                                                   int32_t qOffset,
  73                                                   const armnn::DataLayoutIndexed& layout = armnn::DataLayout::NCHW,
  74                                                   uint32_t padLeft = 0,
  75                                                   uint32_t padTop = 0,
  76                                                   uint32_t padRight = 0,
  77                                                   uint32_t padBottom = 0)
  78 {
  79     unsigned int inputHeight   = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
  80     unsigned int inputWidth    = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
  81     unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
  82     unsigned int inputNum      = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
  83
  84     unsigned int outputHeight   = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
  85     unsigned int outputWidth    = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
  86     unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
  87     unsigned int outputNum      = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
  88
  89     unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
  90     unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
  91     unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
  92     unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
  93
  94     bool biasEnabled = bias.size() > 0;
  95
  96     // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
  97     BOOST_ASSERT(inputNum == 1);
  98     BOOST_ASSERT(outputNum == 1);
  99
 100     // If a bias is used, its size must equal the number of output channels.
 101     BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
 102
 103
 104     // Note these tensors will use two (identical) batches.
 105     armnn::TensorInfo inputTensorInfo = GetTensorInfo<T>(2*inputNum, inputChannels, inputHeight, inputWidth, layout);
 106     armnn::TensorInfo outputTensorInfo = GetTensorInfo<T>(
 107             2*outputNum, outputChannels, outputHeight, outputWidth, layout);
 108     armnn::TensorInfo kernelDesc = GetTensorInfo<T>(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout);
 109     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
 110
 111     // Set quantization parameters if the requested type is a quantized type.
 112     if(armnn::IsQuantizedType<T>())
 113     {
 114         inputTensorInfo.SetQuantizationScale(qScale);
 115         inputTensorInfo.SetQuantizationOffset(qOffset);
 116         outputTensorInfo.SetQuantizationScale(qScale);
 117         outputTensorInfo.SetQuantizationOffset(qOffset);
 118         kernelDesc.SetQuantizationScale(qScale);
 119         kernelDesc.SetQuantizationOffset(qOffset);
 120         biasDesc.SetQuantizationScale(qScale*qScale);
 121         biasDesc.SetQuantizationOffset(0);
 122     }
 123
 124     LayerTestResult<T, 4> ret(outputTensorInfo);
 125
 126     // Construct input data - two batches of the same input image.
 127     std::vector<T> inputImage;
 128     inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
 129     std::vector<T> inputData;
 130     inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
 131     inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
 132
 133     // at this point if we require it permute the input data
 134     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
 135     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 136     {
 137         std::vector<T> tmp(inputData.size());
 138         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
 139         inputData = tmp;
 140     }
 141
 142     auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
 143
 144     std::vector<T> outputImage;
 145     outputImage.assign(originalOutputExpected.data(),
 146             originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
 147
 148     // Apply bias to output image if it is enabled.
 149     if(biasEnabled)
 150     {
 151         std::vector<T> biasV;
 152         biasV.assign(bias.data(), bias.data() + outputChannels);
 153         ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
 154             biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
 155             outputWidth, outputHeight);
 156     }
 157
 158     // Construct expected output data - two identical images.
 159     std::vector<T> outputData;
 160     outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
 161     outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
 162
 163     // at this point if we require it permute the expected output
 164     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 165     {
 166         std::vector<T> tmp(outputData.size());
 167         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
 168         outputData = tmp;
 169     }
 170     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
 171
 172     // Todo: nontrivial padding and strides.
 173     uint32_t                    strideX  = 1;
 174     uint32_t                    strideY  = 1;
 175
 176     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
 177     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 178
 179     armnn::Convolution2dQueueDescriptor data;
 180     armnn::WorkloadInfo info;
 181     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
 182     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
 183     // Permute the kernel if necessary
 184     boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
 185     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 186     {
 187         armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
 188     }
 189     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 190
 191     if(biasEnabled)
 192     {
 193         AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
 194     }
 195
 196     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
 197     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 198
 199     data.m_Weight = &weightsTensor;
 200     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
 201     data.m_Parameters.m_StrideX = strideX;
 202     data.m_Parameters.m_StrideY = strideY;
 203     data.m_Parameters.m_PadLeft = padLeft;
 204     data.m_Parameters.m_PadRight = padRight;
 205     data.m_Parameters.m_PadTop = padTop;
 206     data.m_Parameters.m_PadBottom = padBottom;
 207     data.m_Parameters.m_BiasEnabled = biasEnabled;
 208     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 209
 210     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
 211     inputHandle->Allocate();
 212     outputHandle->Allocate();
 213
 214     CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
 215
 216     workloadFactory.Finalize();
 217     workload->Execute();
 218
 219     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 220
 221     return ret;
 222 }
 223
 224 template<typename T, typename B>
 225 LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory& workloadFactory,
 226                                                       const boost::multi_array<T, 4>& input,
 227                                                       const boost::multi_array<T, 4>& kernel,
 228                                                       const boost::multi_array<B, 1>& bias,
 229                                                       const boost::multi_array<T, 4>& outputExpected,
 230                                                       armnn::DataLayout dataLayout,
 231                                                       float qScale,
 232                                                       int32_t qOffset,
 233                                                       uint32_t padLeft = 1,
 234                                                       uint32_t padTop = 1,
 235                                                       uint32_t padRight = 1,
 236                                                       uint32_t padBottom = 1,
 237                                                       uint32_t strideX  = 1,
 238                                                       uint32_t strideY  = 1)
 239 {
 240     unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
 241     unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[3]);
 242     unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[1]);
 243     unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[2]);
 244
 245     unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
 246     unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
 247     unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
 248     unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
 249
 250     unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
 251     unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
 252     unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
 253     unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
 254
 255     bool biasEnabled = bias.size() > 0;
 256
 257     // Creates the tensors.
 258     armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, armnn::GetDataType<T>());
 259     armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
 260                                        armnn::GetDataType<T>());
 261     armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, armnn::GetDataType<T>());
 262     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
 263
 264     // Construct the input data.
 265     std::vector<T> inputData;
 266     inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
 267     auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
 268
 269     // Construct the output data, with bias applied, as appropriate.
 270     std::vector<T> outputData;
 271     outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
 272
 273     LayerTestResult<T, 4> ret(outputTensorInfo);
 274     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
 275
 276     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
 277     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 278
 279     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
 280     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 281
 282     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
 283
 284     armnn::Convolution2dQueueDescriptor data;
 285
 286     data.m_Weight = &weightsTensor;
 287     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
 288     data.m_Parameters.m_StrideX = strideX;
 289     data.m_Parameters.m_StrideY = strideY;
 290     data.m_Parameters.m_PadLeft = padLeft;
 291     data.m_Parameters.m_PadRight = padRight;
 292     data.m_Parameters.m_PadTop = padTop;
 293     data.m_Parameters.m_PadBottom = padBottom;
 294     data.m_Parameters.m_BiasEnabled = biasEnabled;
 295     data.m_Parameters.m_DataLayout = dataLayout;
 296
 297     armnn::WorkloadInfo info;
 298     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
 299     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 300
 301     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
 302     inputHandle->Allocate();
 303     outputHandle->Allocate();
 304
 305     CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
 306
 307     workloadFactory.Finalize();
 308     workload->Execute();
 309
 310     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 311
 312     return ret;
 313 }
 314
 315 template<typename T, typename B>
 316 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory& workloadFactory,
 317                                                                const boost::multi_array<T, 4>& input,
 318                                                                const boost::multi_array<T, 4>& originalKernel,
 319                                                                const boost::multi_array<B, 1>& bias,
 320                                                                const boost::multi_array<T, 4>& outputExpected,
 321                                                                float qScale,
 322                                                                int32_t qOffset,
 323                                                                const armnn::DataLayoutIndexed& layout,
 324                                                                uint32_t padLeft = 0,
 325                                                                uint32_t padTop = 0,
 326                                                                uint32_t padRight = 0,
 327                                                                uint32_t padBottom = 0,
 328                                                                uint32_t strideX = 1,
 329                                                                uint32_t strideY = 1)
 330 {
 331     unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
 332     unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[1]);
 333     unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[2]);
 334     unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[3]);
 335     unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
 336     unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
 337     unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
 338     unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
 339     unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
 340     unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
 341     unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
 342     unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
 343
 344     // If a bias is used, its size must equal the number of output channels.
 345     bool biasEnabled = bias.size() > 0;
 346     BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
 347
 348     // Creates the tensors.
 349     armnn::TensorInfo inputTensorInfo = GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
 350     armnn::TensorInfo outputTensorInfo = GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
 351     armnn::TensorInfo kernelDesc = GetTensorInfo<T>(kernelChanMul, kernelChannels, kernelHeight, kernelWidth, layout);
 352     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
 353
 354     // Set quantization parameters if the requested type is a quantized type.
 355     if (armnn::IsQuantizedType<T>())
 356     {
 357         inputTensorInfo.SetQuantizationScale(qScale);
 358         inputTensorInfo.SetQuantizationOffset(qOffset);
 359         outputTensorInfo.SetQuantizationScale(qScale);
 360         outputTensorInfo.SetQuantizationOffset(qOffset);
 361         kernelDesc.SetQuantizationScale(qScale);
 362         kernelDesc.SetQuantizationOffset(qOffset);
 363         biasDesc.SetQuantizationScale(qScale*qScale);
 364         biasDesc.SetQuantizationOffset(0);
 365     }
 366
 367     // Construct the input data.
 368     std::vector<T> inputData;
 369     inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
 370
 371     // At this point if we require it permute the input data
 372     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
 373     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 374     {
 375         std::vector<T> tmp(inputData.size());
 376         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
 377         inputData = tmp;
 378     }
 379
 380     auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
 381
 382     // Construct the output data, with bias applied, as appropriate.
 383     std::vector<T> outputData;
 384     outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
 385     if (biasEnabled)
 386     {
 387         std::vector<T> biasV;
 388         biasV.assign(bias.data(), bias.data() + outputChannels);
 389         ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
 390             biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
 391             outputWidth, outputHeight);
 392     }
 393
 394     LayerTestResult<T, 4> ret(outputTensorInfo);
 395
 396     // At this point if we require it permute the expected output
 397     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 398     {
 399         std::vector<T> tmp(outputData.size());
 400         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
 401         outputData = tmp;
 402     }
 403
 404     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
 405
 406     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
 407     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 408
 409     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
 410
 411     // Permute the kernel if necessary
 412     boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
 413     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 414     {
 415         armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
 416     }
 417
 418     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 419
 420     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
 421     if (biasEnabled)
 422     {
 423         AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
 424     }
 425
 426     armnn::DepthwiseConvolution2dQueueDescriptor data;
 427     data.m_Weight = &weightsTensor;
 428     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
 429     data.m_Parameters.m_StrideX = strideX;
 430     data.m_Parameters.m_StrideY = strideY;
 431     data.m_Parameters.m_PadLeft = padLeft;
 432     data.m_Parameters.m_PadRight = padRight;
 433     data.m_Parameters.m_PadTop = padTop;
 434     data.m_Parameters.m_PadBottom = padBottom;
 435     data.m_Parameters.m_BiasEnabled = biasEnabled;
 436     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 437
 438     armnn::WorkloadInfo info;
 439     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
 440     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 441
 442     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
 443     inputHandle->Allocate();
 444     outputHandle->Allocate();
 445
 446     CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
 447
 448     workloadFactory.Finalize();
 449     workload->Execute();
 450
 451     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 452
 453     return ret;
 454 }
 455
 456 template<typename T, typename B>
 457 LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory& workloadFactory,
 458                                                               float qScale,
 459                                                               int32_t qOffset,
 460                                                               bool biasEnabled,
 461                                                               const armnn::DataLayoutIndexed& layout)
 462 {
 463     unsigned int inputHeight = 3;
 464     unsigned int inputWidth = 3;
 465     unsigned int inputChannels = 2;
 466     unsigned int inputNum = 1;
 467
 468     unsigned int kernelHeight = 3;
 469     unsigned int kernelWidth = 3;
 470     unsigned int kernelChannels = inputChannels;
 471
 472     unsigned int outputHeight = 1;
 473     unsigned int outputWidth = 1;
 474     unsigned int outputChannels = kernelChannels;
 475     unsigned int outputNum = inputNum;
 476
 477     armnn::TensorInfo inputTensorInfo = GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
 478     armnn::TensorInfo outputTensorInfo = GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
 479     armnn::TensorInfo kernelDesc = GetTensorInfo<T>(1, outputChannels, kernelHeight, kernelWidth, layout);
 480     armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>());
 481
 482     // Set quantization parameters if the requested type is a quantized type.
 483     if(armnn::IsQuantizedType<T>())
 484     {
 485         inputTensorInfo.SetQuantizationScale(qScale);
 486         inputTensorInfo.SetQuantizationOffset(qOffset);
 487         outputTensorInfo.SetQuantizationScale(qScale);
 488         outputTensorInfo.SetQuantizationOffset(qOffset);
 489         kernelDesc.SetQuantizationScale(qScale);
 490         kernelDesc.SetQuantizationOffset(qOffset);
 491         biasDesc.SetQuantizationScale(qScale*qScale);
 492         biasDesc.SetQuantizationOffset(0);
 493     }
 494     std::vector<T> inputData = std::vector<T>(
 495             QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
 496                     1.f, 2.f, 1.f,
 497                     2.f, 1.f, 2.f,
 498                     1.f, 2.f, 1.f,
 499
 500                     1.f, 2.f, 1.f,
 501                     2.f, 1.f, 2.f,
 502                     1.f, 2.f, 1.f,
 503             }));
 504     // at this point if we require it permute the input data
 505     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
 506     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 507     {
 508         std::vector<T> tmp(inputData.size());
 509         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
 510         inputData = tmp;
 511     }
 512     auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
 513
 514     std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
 515                                             {0, 2}));
 516     auto bias = MakeTensor<B, 1>(biasDesc, biasV);
 517
 518     std::vector<T> kernelData = std::vector<T>(
 519             QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
 520                     1.f, 0.f,  1.f,
 521                     0.f, 0.f,  0.f,
 522                     -1.f, 0.f, -1.f,
 523
 524                     1.f, 0.f,  1.f,
 525                     0.f, 0.f,  0.f,
 526                     -1.f, 0.f, -1.f,
 527             }));
 528     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 529     {
 530         std::vector<T> tmp(kernelData.size());
 531         armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, kernelData.data(), tmp.data());
 532         kernelData = tmp;
 533     }
 534     auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
 535
 536     // Manually calculated.
 537     std::vector<T> outputImage(
 538         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
 539                            outputTensorInfo.GetQuantizationOffset(),
 540                            {0.f, 0.f})
 541     );
 542
 543     // Optionally apply bias to output image.
 544     if(biasEnabled)
 545     {
 546         ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
 547                   biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
 548                   outputWidth, outputHeight);
 549     }
 550
 551     LayerTestResult<T, 4> ret(outputTensorInfo);
 552     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 553     {
 554         std::vector<T> tmp(outputImage.size());
 555         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data());
 556         outputImage = tmp;
 557     }
 558
 559     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
 560
 561     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
 562     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 563
 564     armnn::DepthwiseConvolution2dQueueDescriptor data;
 565     armnn::WorkloadInfo info;
 566     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
 567     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
 568
 569     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 570     AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
 571
 572     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
 573     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 574
 575     data.m_Weight = &weightsTensor;
 576     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
 577     data.m_Parameters.m_StrideX = 1;
 578     data.m_Parameters.m_StrideY = 1;
 579     data.m_Parameters.m_PadLeft = 0;
 580     data.m_Parameters.m_PadRight = 0;
 581     data.m_Parameters.m_PadTop = 0;
 582     data.m_Parameters.m_PadBottom = 0;
 583     data.m_Parameters.m_BiasEnabled = biasEnabled;
 584     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 585
 586     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
 587     inputHandle->Allocate();
 588     outputHandle->Allocate();
 589
 590     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
 591
 592     workloadFactory.Finalize();
 593     workload->Execute();
 594
 595     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 596
 597     return ret;
 598 }
 599
 600 template<typename T, typename B>
 601 LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
 602                                                      float qScale,
 603                                                      int32_t qOffset,
 604                                                      bool biasEnabled,
 605                                                      const armnn::DataLayoutIndexed& layout)
 606 {
 607     unsigned int depthMultiplier = 2;
 608
 609     unsigned int inputHeight    = 8;
 610     unsigned int inputWidth     = 16;
 611     unsigned int inputChannels  = 2;
 612     unsigned int inputBatchSize = 1;
 613
 614     unsigned int kernelHeight = 5;
 615     unsigned int kernelWidth  = 3;
 616
 617     unsigned int outputHeight    = inputHeight - kernelHeight + 1 + 2;
 618     unsigned int outputWidth     = (inputWidth - kernelWidth + 1)/2;
 619     unsigned int outputChannels  = inputChannels * depthMultiplier;
 620     unsigned int outputBatchSize = inputBatchSize;
 621
 622     armnn::TensorInfo inputTensorInfo = GetTensorInfo<T>(
 623             inputBatchSize, inputChannels, inputHeight, inputWidth, layout);
 624     armnn::TensorInfo outputTensorInfo = GetTensorInfo<T>(
 625             outputBatchSize, outputChannels, outputHeight, outputWidth, layout);
 626     armnn::TensorInfo kernelDesc = GetTensorInfo<T>(
 627             depthMultiplier, inputChannels, kernelHeight, kernelWidth, layout);
 628     armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>());
 629
 630     // Set quantization parameters if the requested type is a quantized type.
 631     if(armnn::IsQuantizedType<T>())
 632     {
 633         inputTensorInfo.SetQuantizationScale(qScale);
 634         inputTensorInfo.SetQuantizationOffset(qOffset);
 635         outputTensorInfo.SetQuantizationScale(qScale);
 636         outputTensorInfo.SetQuantizationOffset(qOffset);
 637         kernelDesc.SetQuantizationScale(qScale);
 638         kernelDesc.SetQuantizationOffset(qOffset);
 639         biasDesc.SetQuantizationScale(qScale*qScale);
 640         biasDesc.SetQuantizationOffset(0);
 641     }
 642
 643     // NOTE: originalInputData is in NCHW format
 644     std::vector<T> originalInputData = std::vector<T>(
 645             QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
 646                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 647                     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
 648                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 649                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 650                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 651                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 652                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 653                     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
 654                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 655                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 656                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 657                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 658                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 659                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 660                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 661                     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 662             }));
 663     std::vector<T> inputData = originalInputData;
 664     // at this point if we require it permute the input data
 665     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
 666     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 667     {
 668         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, originalInputData.data(), inputData.data());
 669     }
 670     auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
 671
 672     std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
 673         {0, 2, 1, -1}));
 674     auto bias = MakeTensor<B, 1>(biasDesc, biasV);
 675
 676     std::vector<T> originalKernelData = std::vector<T>(
 677             QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
 678                     1, 1, 1,
 679                     1, -1, 1,
 680                     1, 1, 1,
 681                     1, 1, 1,
 682                     1, 1, 1,
 683
 684                     2, 2, 2,
 685                     2, 2, 2,
 686                     2, 2, 2,
 687                     2, 2, 2,
 688                     2, 2, 2,
 689
 690                     0, 0, 0,
 691                     0, -1, 0,
 692                     0, 0, 0,
 693                     0, 0, 0,
 694                     0, 0, 0,
 695
 696                     0, 0, 0,
 697                     0, 0, 0,
 698                     0, 1, 0,
 699                     0, 0, 0,
 700                     0, 0, 0
 701             }));
 702     std::vector<T> kernelData = originalKernelData;
 703     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 704     {
 705         armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernelData.data(), kernelData.data());
 706     }
 707     auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
 708
 709     // Manually calculated.
 710     std::vector<T> originalOutputImage = std::vector<T>(
 711         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
 712             3.5f,  3.5f,  3.5f,  3.5f,  3.5f,  3.5f,  3.5f,
 713             6.0f,  6.0f,  6.0f,  6.0f,  6.0f,  6.0f,  6.0f,
 714             5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,
 715             6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,
 716             6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,
 717             5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,
 718
 719             -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
 720             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 721             -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
 722             -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
 723             -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
 724             -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
 725
 726             8.0f,  8.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 727             10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 728             10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 729             10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 730             10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 731             8.0f,  8.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 732
 733             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 734             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 735             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 736             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 737             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
 738             0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f
 739         }));
 740
 741     // Optionally apply bias to output image.
 742     if(biasEnabled)
 743     {
 744         ApplyBias(originalOutputImage,
 745                   outputTensorInfo.GetQuantizationScale(),
 746                   outputTensorInfo.GetQuantizationOffset(),
 747                   biasV,
 748                   biasDesc.GetQuantizationScale(),
 749                   biasDesc.GetQuantizationOffset(),
 750                   outputWidth,
 751                   outputHeight);
 752     }
 753
 754     LayerTestResult<T, 4> ret(outputTensorInfo);
 755     std::vector<T> outputImage = originalOutputImage;
 756     if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
 757     {
 758         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, originalOutputImage.data(), outputImage.data());
 759     }
 760
 761     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
 762
 763     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
 764     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 765
 766     armnn::DepthwiseConvolution2dQueueDescriptor data;
 767     armnn::WorkloadInfo info;
 768     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
 769     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
 770
 771     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 772     AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
 773
 774     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
 775     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 776
 777     data.m_Weight = &weightsTensor;
 778     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
 779     data.m_Parameters.m_StrideX = 2;
 780     data.m_Parameters.m_StrideY = 1;
 781     data.m_Parameters.m_PadLeft = 0;
 782     data.m_Parameters.m_PadRight = 0;
 783     data.m_Parameters.m_PadTop = 1;
 784     data.m_Parameters.m_PadBottom = 1;
 785     data.m_Parameters.m_BiasEnabled = biasEnabled;
 786     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 787
 788     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
 789     inputHandle->Allocate();
 790     outputHandle->Allocate();
 791
 792     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
 793
 794     workloadFactory.Finalize();
 795     workload->Execute();
 796
 797     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 798
 799     return ret;
 800 }
 801
 802 template<typename T, typename B>
 803 LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl(armnn::IWorkloadFactory& workloadFactory,
 804                                                          const boost::multi_array<T, 4>& input,
 805                                                          const boost::multi_array<T, 4>& kernel,
 806                                                          const boost::multi_array<B, 1>& bias,
 807                                                          const boost::multi_array<T, 4>& outputExpected,
 808                                                          float qScale,
 809                                                          int32_t qOffset,
 810                                                          uint32_t padLeft = 0,
 811                                                          uint32_t padTop = 0,
 812                                                          uint32_t padRight = 0,
 813                                                          uint32_t padBottom = 0,
 814                                                          uint32_t strideX = 1,
 815                                                          uint32_t strideY = 1)
 816 {
 817     unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
 818     unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[3]);
 819     unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[1]);
 820     unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[2]);
 821
 822     unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
 823     unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
 824     unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
 825     unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
 826
 827     unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
 828     unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
 829     unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
 830     unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
 831
 832     // Creates the tensors.
 833     armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, armnn::GetDataType<T>());
 834     armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
 835                                        armnn::GetDataType<T>());
 836     armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, armnn::GetDataType<T>());
 837     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
 838
 839     // Set quantization parameters if the requested type is a quantized type.
 840     if (armnn::IsQuantizedType<T>())
 841     {
 842         inputTensorInfo.SetQuantizationScale(qScale);
 843         inputTensorInfo.SetQuantizationOffset(qOffset);
 844         outputTensorInfo.SetQuantizationScale(qScale);
 845         outputTensorInfo.SetQuantizationOffset(qOffset);
 846         kernelDesc.SetQuantizationScale(qScale);
 847         kernelDesc.SetQuantizationOffset(qOffset);
 848         biasDesc.SetQuantizationScale(qScale*qScale);
 849         biasDesc.SetQuantizationOffset(0);
 850     }
 851
 852     // Construct the input data.
 853     std::vector<T> inputData;
 854     inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
 855     auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
 856
 857     // Construct the output data, with bias applied, as appropriate.
 858     std::vector<T> outputData;
 859     outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
 860
 861     LayerTestResult<T, 4> ret(outputTensorInfo);
 862     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
 863
 864     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
 865     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 866
 867     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
 868     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 869
 870     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
 871
 872     armnn::DepthwiseConvolution2dQueueDescriptor data;
 873     data.m_Weight = &weightsTensor;
 874     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
 875     data.m_Parameters.m_StrideX = strideX;
 876     data.m_Parameters.m_StrideY = strideY;
 877     data.m_Parameters.m_PadLeft = padLeft;
 878     data.m_Parameters.m_PadRight = padRight;
 879     data.m_Parameters.m_PadTop = padTop;
 880     data.m_Parameters.m_PadBottom = padBottom;
 881     data.m_Parameters.m_DataLayout = armnn::DataLayout::NHWC;
 882
 883     armnn::WorkloadInfo info;
 884     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
 885     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 886
 887     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
 888
 889     inputHandle->Allocate();
 890     outputHandle->Allocate();
 891
 892     CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
 893
 894     workloadFactory.Finalize();
 895     workload->Execute();
 896
 897     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 898
 899     return ret;
 900 }
 901
 902 template<typename T>
 903 LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory,
 904                                            float qScale,
 905                                            int32_t qOffset,
 906                                            bool biasEnabled)
 907 {
 908     using B = typename FullyConnectedBiasTypeForInputType<T>::Type;
 909
 910     // Until we have a specialist 1D convolution layer, we can fake one using
 911     // 2D convolution with the final dimension set to 1.
 912     // I don't anticipate this being particularly slow, given that convolution is implemented
 913     // as a matrix multiplication, at which point dimension doesn't matter.
 914
 915     unsigned int batchSize      = 1;
 916     unsigned int inputChannels  = 2;
 917     unsigned int outputChannels = 3;
 918     unsigned int inputSize      = 5; // The 1D size (could view as 'width' or 'height').
 919     unsigned int kernelSize     = 3;
 920     unsigned int padSize        = 2;
 921     unsigned int stride         = 1;
 922     unsigned int outputSize     = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
 923
 924     armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, armnn::GetDataType<T>());
 925     armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, armnn::GetDataType<T>());
 926     armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, armnn::GetDataType<T>());
 927     armnn::TensorInfo biasInfo({outputChannels}, armnn::GetDataType<B>());
 928
 929     // Set quantization parameters if the requested type is a quantized type.
 930     if(armnn::IsQuantizedType<T>())
 931     {
 932         inputInfo.SetQuantizationScale(qScale);
 933         inputInfo.SetQuantizationOffset(qOffset);
 934         outputInfo.SetQuantizationScale(qScale);
 935         outputInfo.SetQuantizationOffset(qOffset);
 936         kernelInfo.SetQuantizationScale(qScale);
 937         kernelInfo.SetQuantizationOffset(qOffset);
 938         biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
 939         biasInfo.SetQuantizationOffset(0);
 940     }
 941
 942     std::vector<T> inputData(
 943         QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), {
 944             5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
 945             -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
 946         }));
 947
 948     std::vector<T> kernelData(
 949         QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), {
 950             1.0f, 0.0f, 0.0f,
 951             0.0f, 2.0f, -1.5f,
 952
 953             0.0f, 0.0f, 0.0f,
 954             0.2f, 0.2f, 0.2f,
 955
 956             0.5f, 0.0f, 0.5f,
 957             0.0f, -1.0f, 0.0f
 958         }));
 959
 960     std::vector<B> biasData(
 961         QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), {
 962             1.0f, 0.0f, 0.0f
 963         }));
 964
 965     std::vector<T> outputData(
 966         QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), {
 967             4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
 968             -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
 969             2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
 970         }));
 971
 972     // Optionally apply bias to output image.
 973     if(biasEnabled)
 974     {
 975         ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
 976             biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
 977             1, outputSize);
 978     }
 979
 980     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputInfo);
 981     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
 982
 983     armnn::Convolution2dQueueDescriptor data;
 984     armnn::WorkloadInfo info;
 985     armnn::ScopedCpuTensorHandle         weightsTensor(kernelInfo);
 986     armnn::ScopedCpuTensorHandle         biasTensor(biasInfo);
 987
 988     AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
 989     AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
 990
 991     AddInputToWorkload(data, info, inputInfo, inputHandle.get());
 992     AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
 993
 994     data.m_Weight         = &weightsTensor;
 995     data.m_Bias           = &biasTensor;
 996     data.m_Parameters.m_StrideX        = 1;
 997     data.m_Parameters.m_StrideY        = stride;
 998     data.m_Parameters.m_PadLeft        = 0;
 999     data.m_Parameters.m_PadRight       = 0;
1000     data.m_Parameters.m_PadTop         = padSize;
1001     data.m_Parameters.m_PadBottom      = padSize;
1002     data.m_Parameters.m_BiasEnabled    = biasEnabled;
1003
1004     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
1005     inputHandle->Allocate();
1006     outputHandle->Allocate();
1007
1008     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
1009
1010     workloadFactory.Finalize();
1011     workload->Execute();
1012
1013     // Output
1014     LayerTestResult<T,4> ret(outputInfo);
1015     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1016     ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
1017     return ret;
1018 }
1019
1020
1021
1022 template<typename T>
1023 LayerTestResult<T,4> CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
1024                                                 armnn::IWorkloadFactory& refWorkloadFactory)
1025 {
1026     unsigned int inputHeight   = 8;
1027     unsigned int inputWidth    = 16;
1028     unsigned int inputChannels = 3;
1029     unsigned int inputNum      = 5;
1030
1031     unsigned int kernelHeight = 3;
1032     unsigned int kernelWidth  = 3;
1033
1034     unsigned int strideX = 2;
1035     unsigned int strideY = 3;
1036     unsigned int padX    = 1;
1037     unsigned int padY    = 1;
1038
1039     unsigned int outputNum      = inputNum;
1040     unsigned int outputChannels = 2;
1041     unsigned int outputHeight   = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1042     unsigned int outputWidth    = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1043
1044     armnn::TensorInfo inputTensorInfo;
1045     armnn::TensorInfo outputTensorInfo;
1046     armnn::TensorInfo kernelDesc;
1047     armnn::TensorInfo biasDesc;
1048
1049     unsigned int inputShape[]    = {inputNum, inputChannels, inputHeight, inputWidth};
1050     unsigned int outputShape[]   = {outputNum, outputChannels, outputHeight, outputWidth};
1051     unsigned int kernelShape[]   = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1052     unsigned int biasShape[]     = {outputChannels};
1053
1054     inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
1055     outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
1056     kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>());
1057     biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>());
1058
1059     LayerTestResult<T,4> ret(outputTensorInfo);
1060
1061     auto input  = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
1062     auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
1063     auto bias   = MakeRandomTensor<T, 1>(biasDesc, 1028);
1064
1065     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1066     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1067
1068     armnn::Convolution2dQueueDescriptor data;
1069     armnn::WorkloadInfo info;
1070     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1071     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1072
1073     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1074     AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1075
1076     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1077     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1078     data.m_Weight = &weightsTensor;
1079     data.m_Bias = &biasTensor;
1080     data.m_Parameters.m_StrideX = strideX;
1081     data.m_Parameters.m_StrideY = strideY;
1082     data.m_Parameters.m_PadLeft = padX;
1083     data.m_Parameters.m_PadRight = padX;
1084     data.m_Parameters.m_PadTop = padY;
1085     data.m_Parameters.m_PadBottom = padY;
1086     data.m_Parameters.m_BiasEnabled = true;
1087
1088     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1089     std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1090
1091     armnn::Convolution2dQueueDescriptor refData = data;
1092     armnn::WorkloadInfo               refInfo = info;
1093     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1094     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1095
1096     std::unique_ptr<armnn::IWorkload> workload  = workloadFactory.CreateConvolution2d(data, info);
1097     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
1098
1099     outputHandleRef->Allocate();
1100     inputHandleRef->Allocate();
1101
1102     inputHandle->Allocate();
1103     outputHandle->Allocate();
1104
1105     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1106     CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1107
1108     workloadFactory.Finalize();
1109     workload->Execute();
1110     refWorkloadFactory.Finalize();
1111     workloadRef->Execute();
1112
1113     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1114     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1115
1116     return ret;
1117 }
1118
1119 template<typename T>
1120 LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
1121                                                             armnn::IWorkloadFactory& refWorkloadFactory,
1122                                                             const armnn::DataLayoutIndexed& layout)
1123 {
1124     unsigned int inputHeight = 8;
1125     unsigned int inputWidth = 16;
1126     unsigned int inputChannels = 3;
1127     unsigned int inputNum = 5;
1128
1129     unsigned int kernelHeight = 3;
1130     unsigned int kernelWidth = 3;
1131     unsigned int channelMultiplier = 1;
1132
1133     unsigned int strideX = 2;
1134     unsigned int strideY = 3;
1135     unsigned int padX = 1;
1136     unsigned int padY = 1;
1137
1138     unsigned int outputNum = inputNum;
1139     unsigned int outputChannels = inputChannels * channelMultiplier;
1140     unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1141     unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1142
1143     armnn::TensorInfo inputTensorInfo;
1144     armnn::TensorInfo outputTensorInfo;
1145     armnn::TensorInfo kernelDesc;
1146     armnn::TensorInfo biasDesc;
1147
1148
1149     std::vector<unsigned int> inputShape;
1150     std::vector<unsigned int> outputShape;
1151     std::vector<unsigned int> kernelShape;
1152     std::vector<unsigned int> biasShape= { outputChannels };
1153     switch (layout.GetDataLayout())
1154     {
1155         case armnn::DataLayout::NCHW:
1156             inputShape =  { inputNum, inputChannels, inputHeight, inputWidth };
1157             outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
1158             kernelShape = { channelMultiplier, inputChannels, kernelHeight, kernelWidth };
1159             break;
1160         case armnn::DataLayout ::NHWC:
1161             inputShape =  { inputNum, inputHeight, inputWidth, inputChannels };
1162             outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
1163             kernelShape = { channelMultiplier, kernelHeight, kernelWidth, inputChannels };
1164             break;
1165         default:
1166             throw armnn::InvalidArgumentException("unknown data layout ["
1167                                                   + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
1168     }
1169
1170     float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
1171     float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
1172     int32_t qOffset = 0;
1173
1174     inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), armnn::GetDataType<T>(), inputsQScale, qOffset);
1175     outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), armnn::GetDataType<T>(), outputQScale, qOffset);
1176     kernelDesc = armnn::TensorInfo(4, kernelShape.data(), armnn::GetDataType<T>(), inputsQScale, qOffset);
1177     biasDesc = armnn::TensorInfo(
1178             1, biasShape.data(), armnn::GetBiasDataType(armnn::GetDataType<T>()), inputsQScale, qOffset);
1179
1180     LayerTestResult<T, 4> ret(outputTensorInfo);
1181
1182     auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
1183     auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
1184     auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
1185             biasDesc, 1028, 0.0f, 255.0f);
1186
1187     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1188     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1189
1190     armnn::DepthwiseConvolution2dQueueDescriptor data;
1191     armnn::WorkloadInfo info;
1192     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1193     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1194
1195     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1196     AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1197
1198     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1199     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1200     data.m_Weight = &weightsTensor;
1201     data.m_Bias = &biasTensor;
1202     data.m_Parameters.m_StrideX = strideX;
1203     data.m_Parameters.m_StrideY = strideY;
1204     data.m_Parameters.m_PadLeft = padX;
1205     data.m_Parameters.m_PadRight = padX;
1206     data.m_Parameters.m_PadTop = padY;
1207     data.m_Parameters.m_PadBottom = padY;
1208     data.m_Parameters.m_BiasEnabled = true;
1209     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
1210
1211     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1212     std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1213
1214     armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
1215     armnn::WorkloadInfo refInfo = info;
1216     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1217     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1218
1219     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1220     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
1221
1222     outputHandleRef->Allocate();
1223     inputHandleRef->Allocate();
1224
1225     inputHandle->Allocate();
1226     outputHandle->Allocate();
1227
1228     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1229     CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1230
1231     workloadFactory.Finalize();
1232     workload->Execute();
1233     refWorkloadFactory.Finalize();
1234     workloadRef->Execute();
1235
1236     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1237     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1238
1239     return ret;
1240 }