IVGCVSW-3381 Break up LayerTests.hpp into more manageable files
authorAron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Wed, 28 Aug 2019 17:08:46 +0000 (18:08 +0100)
committermike.kelly <mike.kelly@arm.com>
Fri, 30 Aug 2019 10:58:54 +0000 (10:58 +0000)
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Icf39434f09fd340ad664cb3b97b8bee6d9da4838

78 files changed:
src/backends/backendsCommon/common.mk
src/backends/backendsCommon/test/BatchNormTestImpl.hpp [deleted file]
src/backends/backendsCommon/test/CMakeLists.txt
src/backends/backendsCommon/test/ConcatEndToEndTestImpl.hpp [moved from src/backends/backendsCommon/test/ConcatTestImpl.hpp with 100% similarity]
src/backends/backendsCommon/test/Conv2dTestImpl.hpp [deleted file]
src/backends/backendsCommon/test/DataTypeUtils.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/DetectionPostProcessEndToEndTestImpl.hpp [moved from src/backends/backendsCommon/test/DetectionPostProcessTestImpl.hpp with 100% similarity]
src/backends/backendsCommon/test/FullyConnectedTestImpl.hpp [deleted file]
src/backends/backendsCommon/test/GatherTestImpl.hpp [deleted file]
src/backends/backendsCommon/test/LayerTests.cpp [deleted file]
src/backends/backendsCommon/test/LayerTests.hpp
src/backends/backendsCommon/test/SoftmaxTestImpl.hpp [deleted file]
src/backends/backendsCommon/test/layerTests/ActivationTestImpl.cpp [moved from src/backends/backendsCommon/test/ActivationTestImpl.hpp with 99% similarity]
src/backends/backendsCommon/test/layerTests/ActivationTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/BatchNormalizationTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/BatchNormalizationTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/BatchToSpaceNdTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ConcatTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ConcatTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ConstantTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ConstantTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ConvertFp16ToFp32TestImpl.cpp [moved from src/backends/backendsCommon/test/ConvertFp16ToFp32TestImpl.hpp with 90% similarity]
src/backends/backendsCommon/test/layerTests/ConvertFp16ToFp32TestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ConvertFp32ToFp16TestImpl.cpp [moved from src/backends/backendsCommon/test/ConvertFp32ToFp16TestImpl.hpp with 89% similarity]
src/backends/backendsCommon/test/layerTests/ConvertFp32ToFp16TestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/DebugTestImpl.cpp [moved from src/backends/backendsCommon/test/DebugTestImpl.hpp with 71% similarity]
src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.cpp [moved from src/backends/backendsCommon/test/DequantizeTestImpl.hpp with 79% similarity]
src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/DetectionPostProcessTestImpl.hpp [moved from src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp with 99% similarity]
src/backends/backendsCommon/test/layerTests/FakeQuantizationTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/FakeQuantizationTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/FloorTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/FloorTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/GatherTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/GatherTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/L2NormalizationTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/L2NormalizationTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp [moved from src/backends/backendsCommon/test/LstmTestImpl.hpp with 86% similarity]
src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/MeanTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/NormalizationTestImpl.cpp [moved from src/backends/backendsCommon/test/NormTestImpl.hpp with 88% similarity]
src/backends/backendsCommon/test/layerTests/NormalizationTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/PadTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/PadTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/PermuteTestImpl.hpp [moved from src/backends/backendsCommon/test/PermuteTestImpl.hpp with 97% similarity]
src/backends/backendsCommon/test/layerTests/Pooling2dTestImpl.cpp [moved from src/backends/backendsCommon/test/Pooling2dTestImpl.hpp with 71% similarity]
src/backends/backendsCommon/test/layerTests/Pooling2dTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/PreluTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.cpp [moved from src/backends/backendsCommon/test/QuantizeTestImpl.hpp with 80% similarity]
src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ReshapeTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ReshapeTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/ResizeTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/RsqrtTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/RsqrtTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/SoftmaxTestImpl.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/SoftmaxTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/SpaceToBatchNdTestImpl.cpp [moved from src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp with 55% similarity]
src/backends/backendsCommon/test/layerTests/SpaceToBatchNdTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/SpaceToDepthTestImpl.cpp [moved from src/backends/backendsCommon/test/SpaceToDepthTestImpl.hpp with 63% similarity]
src/backends/backendsCommon/test/layerTests/SpaceToDepthTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/SplitterTestImpl.cpp [moved from src/backends/backendsCommon/test/SplitterTestImpl.hpp with 87% similarity]
src/backends/backendsCommon/test/layerTests/SplitterTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/StackTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/StridedSliceTestImpl.cpp [moved from src/backends/backendsCommon/test/StridedSliceTestImpl.hpp with 62% similarity]
src/backends/backendsCommon/test/layerTests/StridedSliceTestImpl.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/layerTests/TransposeConvolution2dTestImpl.hpp [moved from src/backends/backendsCommon/test/TransposeConvolution2dTestImpl.hpp with 99% similarity]
src/backends/cl/test/ClEndToEndTests.cpp
src/backends/cl/test/ClLayerTests.cpp
src/backends/neon/test/NeonEndToEndTests.cpp
src/backends/neon/test/NeonLayerTests.cpp
src/backends/reference/test/RefEndToEndTests.cpp
src/backends/reference/test/RefLayerTests.cpp

index b99f18a..88eae35 100644 (file)
@@ -32,15 +32,40 @@ COMMON_SOURCES := \
 COMMON_TEST_SOURCES := \
     test/CommonTestUtils.cpp \
     test/JsonPrinterTestImpl.cpp \
-    test/LayerTests.cpp \
     test/TensorCopyUtils.cpp \
+    test/layerTests/ActivationTestImpl.cpp \
     test/layerTests/AdditionTestImpl.cpp \
+    test/layerTests/BatchNormalizationTestImpl.cpp \
+    test/layerTests/ConcatTestImpl.cpp \
+    test/layerTests/ConstantTestImpl.cpp \
+    test/layerTests/Conv2dTestImpl.cpp \
+    test/layerTests/ConvertFp16ToFp32TestImpl.cpp \
+    test/layerTests/ConvertFp32ToFp16TestImpl.cpp \
+    test/layerTests/DebugTestImpl.cpp \
+    test/layerTests/DequantizeTestImpl.cpp \
     test/layerTests/DivisionTestImpl.cpp \
     test/layerTests/EqualTestImpl.cpp \
+    test/layerTests/FakeQuantizationTestImpl.cpp \
+    test/layerTests/FloorTestImpl.cpp \
+    test/layerTests/FullyConnectedTestImpl.cpp \
+    test/layerTests/GatherTestImpl.cpp \
     test/layerTests/GreaterTestImpl.cpp \
+    test/layerTests/L2NormalizationTestImpl.cpp \
+    test/layerTests/LstmTestImpl.cpp \
     test/layerTests/MaximumTestImpl.cpp \
     test/layerTests/MinimumTestImpl.cpp \
     test/layerTests/MultiplicationTestImpl.cpp \
+    test/layerTests/NormalizationTestImpl.cpp \
+    test/layerTests/PadTestImpl.cpp \
+    test/layerTests/Pooling2dTestImpl.cpp \
+    test/layerTests/ReshapeTestImpl.cpp \
+    test/layerTests/RsqrtTestImpl.cpp \
+    test/layerTests/QuantizeTestImpl.cpp \
+    test/layerTests/SoftmaxTestImpl.cpp \
+    test/layerTests/SpaceToBatchNdTestImpl.cpp \
+    test/layerTests/SpaceToDepthTestImpl.cpp \
+    test/layerTests/SplitterTestImpl.cpp \
+    test/layerTests/StridedSliceTestImpl.cpp \
     test/layerTests/SubtractionTestImpl.cpp
 
 ifeq ($(ARMNN_REF_ENABLED),1)
diff --git a/src/backends/backendsCommon/test/BatchNormTestImpl.hpp b/src/backends/backendsCommon/test/BatchNormTestImpl.hpp
deleted file mode 100644 (file)
index d34a54f..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-//
-// Copyright Â© 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <ResolveType.hpp>
-#include "WorkloadTestUtils.hpp"
-
-#include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-#include <backendsCommon/test/QuantizeHelper.hpp>
-
-#include <test/TensorHelpers.hpp>
-
-#include <DataLayoutIndexed.hpp>
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchNormTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::TensorShape& inputOutputTensorShape,
-    const std::vector<float>& inputValues,
-    const std::vector<float>& expectedOutputValues,
-    float qScale,
-    int32_t qOffset,
-    armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, ArmnnType);
-    armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, ArmnnType);
-
-    armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
-
-    armnn::TensorInfo tensorInfo({ inputOutputTensorShape[dataLayoutIndexed.GetChannelsIndex()] },
-                                 ArmnnType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        tensorInfo.SetQuantizationScale(qScale);
-        tensorInfo.SetQuantizationOffset(qOffset);
-    }
-
-    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo,
-                                        QuantizedVector<T>(qScale, qOffset, inputValues));
-
-    // These values are per-channel of the input.
-    auto mean     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2}));
-    auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4,  9}));
-    auto beta     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3,  2}));
-    auto gamma    = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2,  1}));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-
-    result.outputExpected = MakeTensor<T, 4>(inputTensorInfo,
-                                             QuantizedVector<T>(qScale, qOffset, expectedOutputValues));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
-
-    armnn::BatchNormalizationQueueDescriptor descriptor;
-    descriptor.m_Mean                    = &meanTensor;
-    descriptor.m_Variance                = &varianceTensor;
-    descriptor.m_Beta                    = &betaTensor;
-    descriptor.m_Gamma                   = &gammaTensor;
-    descriptor.m_Parameters.m_Eps        = 0.0f;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-    armnn::WorkloadInfo info;
-
-    AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
-    AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
-    AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
-    AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-
-    return result;
-}
-
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T,4> BatchNormTestNhwcImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    const unsigned int width    = 2;
-    const unsigned int height   = 3;
-    const unsigned int channels = 2;
-    const unsigned int num      = 1;
-
-    armnn::TensorInfo inputTensorInfo({num, height, width, channels}, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({num, height, width, channels}, ArmnnType);
-    armnn::TensorInfo tensorInfo({channels}, ArmnnType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        tensorInfo.SetQuantizationScale(qScale);
-        tensorInfo.SetQuantizationOffset(qOffset);
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo,
-        QuantizedVector<T>(qScale, qOffset,
-        {
-            1.f, 1.f, 4.f, 1.f,
-            4.f, 4.f, 2.f, 1.f,
-            1.f, -2.f, 6.f, 4.f
-        }));
-    // These values are per-channel of the input.
-    auto mean     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2}));
-    auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4, 9}));
-    auto beta     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, 2}));
-    auto gamma    = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2, 1}));
-    LayerTestResult<T,4> ret(outputTensorInfo);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::BatchNormalizationQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
-
-    AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
-    AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
-    AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
-    AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Mean             = &meanTensor;
-    data.m_Variance         = &varianceTensor;
-    data.m_Beta             = &betaTensor;
-    data.m_Gamma            = &gammaTensor;
-    data.m_Parameters.m_Eps = 0.0f;
-    data.m_Parameters.m_DataLayout = armnn::DataLayout::NHWC;
-
-    // For each channel:
-    // substract mean, divide by standard deviation (with an epsilon to avoid div by 0),
-    // multiply by gamma and add beta
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-        QuantizedVector<T>(qScale, qOffset,
-        {
-            1.f, 3.f, 4.f, 3.f,
-            4.f, 4.f, 2.f, 3.f,
-            1.f, 2.f, 6.f, 4.f
-        }));
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
index 7dca047..c0760cb 100644 (file)
 
 list(APPEND armnnBackendsCommonUnitTests_sources
     ActivationFixture.hpp
-    ActivationTestImpl.hpp
     BackendIdTests.cpp
     BackendRegistryTests.cpp
-    BatchNormTestImpl.hpp
     CommonTestUtils.cpp
     CommonTestUtils.hpp
-    Conv2dTestImpl.hpp
-    ConvertFp16ToFp32TestImpl.hpp
-    ConvertFp32ToFp16TestImpl.hpp
-    DebugTestImpl.hpp
-    DequantizeTestImpl.hpp
+    DataTypeUtils.hpp
     DequantizeEndToEndTestImpl.hpp
-    DetectionPostProcessLayerTestImpl.hpp
-    DetectionPostProcessTestImpl.hpp
+    DetectionPostProcessEndToEndTestImpl.hpp
     DynamicBackendTests.cpp
     DynamicBackendTests.hpp
     EndToEndTestImpl.hpp
-    FullyConnectedTestImpl.hpp
-    GatherTestImpl.hpp
     GatherEndToEndTestImpl.hpp
     IsLayerSupportedTestImpl.hpp
     JsonPrinterTestImpl.cpp
     JsonPrinterTestImpl.hpp
     LayerReleaseConstantDataTest.cpp
-    LayerTests.cpp
     LayerTests.hpp
-    LstmTestImpl.hpp
-    NormTestImpl.hpp
-    ConcatTestImpl.hpp
+    ConcatEndToEndTestImpl.hpp
     MockBackend.cpp
     MockBackend.hpp
     MockBackendId.hpp
     OptimizeSubgraphViewTests.cpp
     OptimizationViewsTests.cpp
-    PermuteTestImpl.hpp
-    Pooling2dTestImpl.hpp
     PreluEndToEndTestImpl.hpp
     QuantizeHelper.hpp
-    QuantizeTestImpl.hpp
     QuantizedLstmEndToEndTestImpl.hpp
     ResizeEndToEndTestImpl.hpp
     RuntimeTestImpl.hpp
-    SoftmaxTestImpl.hpp
-    SpaceToDepthTestImpl.hpp
     SpaceToDepthEndToEndTestImpl.hpp
     SplitterEndToEndTestImpl.hpp
-    SplitterTestImpl.hpp
-    StridedSliceTestImpl.hpp
     TensorCopyUtils.cpp
     TensorCopyUtils.hpp
     WorkloadFactoryHelper.hpp
     WorkloadTestUtils.hpp
+    layerTests/ActivationTestImpl.cpp
+    layerTests/ActivationTestImpl.hpp
     layerTests/AdditionTestImpl.cpp
     layerTests/AdditionTestImpl.hpp
+    layerTests/BatchNormalizationTestImpl.cpp
+    layerTests/BatchNormalizationTestImpl.hpp
+    layerTests/BatchToSpaceNdTestImpl.hpp
+    layerTests/ConcatTestImpl.cpp
+    layerTests/ConcatTestImpl.hpp
+    layerTests/ConstantTestImpl.cpp
+    layerTests/ConstantTestImpl.hpp
+    layerTests/Conv2dTestImpl.cpp
+    layerTests/Conv2dTestImpl.hpp
+    layerTests/ConvertFp16ToFp32TestImpl.cpp
+    layerTests/ConvertFp16ToFp32TestImpl.hpp
+    layerTests/ConvertFp32ToFp16TestImpl.cpp
+    layerTests/ConvertFp32ToFp16TestImpl.hpp
+    layerTests/DebugTestImpl.cpp
+    layerTests/DebugTestImpl.hpp
+    layerTests/DequantizeTestImpl.cpp
+    layerTests/DequantizeTestImpl.hpp
+    layerTests/DetectionPostProcessTestImpl.hpp
     layerTests/DivisionTestImpl.cpp
     layerTests/DivisionTestImpl.hpp
     layerTests/ElementwiseTestImpl.hpp
     layerTests/EqualTestImpl.cpp
     layerTests/EqualTestImpl.hpp
+    layerTests/FakeQuantizationTestImpl.cpp
+    layerTests/FakeQuantizationTestImpl.hpp
+    layerTests/FloorTestImpl.cpp
+    layerTests/FloorTestImpl.hpp
+    layerTests/FullyConnectedTestImpl.cpp
+    layerTests/FullyConnectedTestImpl.hpp
+    layerTests/GatherTestImpl.cpp
+    layerTests/GatherTestImpl.hpp
     layerTests/GreaterTestImpl.cpp
     layerTests/GreaterTestImpl.hpp
+    layerTests/L2NormalizationTestImpl.cpp
+    layerTests/L2NormalizationTestImpl.hpp
     layerTests/LayerTestResult.hpp
+    layerTests/LstmTestImpl.cpp
+    layerTests/LstmTestImpl.hpp
     layerTests/MaximumTestImpl.cpp
     layerTests/MaximumTestImpl.hpp
+    layerTests/MeanTestImpl.hpp
     layerTests/MinimumTestImpl.cpp
     layerTests/MinimumTestImpl.hpp
     layerTests/MultiplicationTestImpl.cpp
     layerTests/MultiplicationTestImpl.hpp
+    layerTests/NormalizationTestImpl.cpp
+    layerTests/NormalizationTestImpl.hpp
+    layerTests/PadTestImpl.cpp
+    layerTests/PadTestImpl.hpp
+    layerTests/PermuteTestImpl.hpp
+    layerTests/Pooling2dTestImpl.cpp
+    layerTests/Pooling2dTestImpl.hpp
+    layerTests/PreluTestImpl.hpp
+    layerTests/QuantizeTestImpl.cpp
+    layerTests/QuantizeTestImpl.hpp
+    layerTests/ReshapeTestImpl.cpp
+    layerTests/ReshapeTestImpl.hpp
+    layerTests/ResizeTestImpl.hpp
+    layerTests/RsqrtTestImpl.cpp
+    layerTests/RsqrtTestImpl.hpp
+    layerTests/SoftmaxTestImpl.cpp
+    layerTests/SoftmaxTestImpl.hpp
+    layerTests/SpaceToBatchNdTestImpl.cpp
+    layerTests/SpaceToBatchNdTestImpl.hpp
+    layerTests/SpaceToDepthTestImpl.cpp
+    layerTests/SpaceToDepthTestImpl.hpp
+    layerTests/SplitterTestImpl.cpp
+    layerTests/SplitterTestImpl.hpp
+    layerTests/StackTestImpl.hpp
+    layerTests/StridedSliceTestImpl.cpp
+    layerTests/StridedSliceTestImpl.hpp
     layerTests/SubtractionTestImpl.cpp
     layerTests/SubtractionTestImpl.hpp
+    layerTests/TransposeConvolution2dTestImpl.hpp
 )
 
 if (ARMNNREF)
diff --git a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
deleted file mode 100644 (file)
index 98e5090..0000000
+++ /dev/null
@@ -1,1319 +0,0 @@
-//
-// Copyright Â© 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include "WorkloadTestUtils.hpp"
-#include "TensorUtils.hpp"
-#include <ResolveType.hpp>
-
-#include <Permute.hpp>
-#include <DataLayoutIndexed.hpp>
-
-#include <test/TensorHelpers.hpp>
-
-#include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
-
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-#include <backendsCommon/test/QuantizeHelper.hpp>
-
-#include <boost/numeric/conversion/cast.hpp>
-
-#include <string>
-
-// Mapping from input type to bias type for fully connected layers.
-// float => float, uint8_t => int32_t
-template<typename T>
-struct FullyConnectedBiasTypeForInputType;
-
-template<>
-struct FullyConnectedBiasTypeForInputType<float>
-{
-    using Type = float;
-};
-
-template<>
-struct FullyConnectedBiasTypeForInputType<uint8_t>
-{
-    using Type = int32_t;
-};
-
-// Modifies a std::vector in-place using a specified bias.
-template<typename T, typename B>
-void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
-    const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
-{
-    BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
-                     "Invalid type and parameter combination.");
-    BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
-                     "Invalid type and parameter combination.");
-
-    // Note we need to dequantize and re-quantize the image value and the bias.
-    for (uint32_t i = 0; i < bias.size(); ++i)
-    {
-        float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
-        for (uint32_t y = 0; y < h; ++y)
-        {
-            for (uint32_t x = 0; x < w; ++x)
-            {
-                uint32_t offset = (i * h + y) * w + x;
-                BOOST_ASSERT(offset < v.size());
-                T& outRef = v[offset];
-                float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
-                outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
-            }
-        }
-    }
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
-LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const boost::multi_array<T, 4>& originalInput,
-    const boost::multi_array<T, 4>& originalKernel,
-    const boost::multi_array<B, 1>& bias,
-    const boost::multi_array<T, 4>& originalOutputExpected,
-    float qScale,
-    int32_t qOffset,
-    const armnn::DataLayout layout = armnn::DataLayout::NCHW,
-    uint32_t padLeft = 0,
-    uint32_t padTop = 0,
-    uint32_t padRight = 0,
-    uint32_t padBottom = 0,
-    uint32_t strideX = 1,
-    uint32_t strideY = 1,
-    uint32_t dilationX = 1,
-    uint32_t dilationY = 1)
-{
-    unsigned int inputHeight   = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
-    unsigned int inputWidth    = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
-    unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
-    unsigned int inputNum      = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
-
-    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
-    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
-    unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
-    unsigned int outputNum      = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
-
-    unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
-    unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
-    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
-    unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
-
-    bool biasEnabled = bias.size() > 0;
-
-    // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
-    BOOST_ASSERT(inputNum == 1);
-    BOOST_ASSERT(outputNum == 1);
-
-    // If a bias is used, its size must equal the number of output channels.
-    BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
-
-
-    // Note these tensors will use two (identical) batches.
-    armnn::TensorInfo inputTensorInfo =
-            armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo =
-            armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
-    armnn::TensorInfo kernelDesc =
-            armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
-    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        kernelDesc.SetQuantizationScale(qScale);
-        kernelDesc.SetQuantizationOffset(qOffset);
-        biasDesc.SetQuantizationScale(qScale*qScale);
-        biasDesc.SetQuantizationOffset(0);
-    }
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-
-    // Construct input data - two batches of the same input image.
-    std::vector<T> inputImage;
-    inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
-    std::vector<T> inputData;
-    inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
-    inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
-
-    // at this point if we require it permute the input data
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-    }
-
-    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
-
-    std::vector<T> outputImage;
-    outputImage.assign(originalOutputExpected.data(),
-            originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
-
-    // Apply bias to output image if it is enabled.
-    if(biasEnabled)
-    {
-        std::vector<T> biasV;
-        biasV.assign(bias.data(), bias.data() + outputChannels);
-        ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-            biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
-            outputWidth, outputHeight);
-    }
-
-    // Construct expected output data - two identical images.
-    std::vector<T> outputData;
-    outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
-    outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
-
-    // at this point if we require it permute the expected output
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
-        outputData = tmp;
-    }
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::Convolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-    // Permute the kernel if necessary
-    boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
-    }
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-
-    if(biasEnabled)
-    {
-        AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-    }
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
-    data.m_Parameters.m_StrideX = strideX;
-    data.m_Parameters.m_StrideY = strideY;
-    data.m_Parameters.m_PadLeft = padLeft;
-    data.m_Parameters.m_PadRight = padRight;
-    data.m_Parameters.m_PadTop = padTop;
-    data.m_Parameters.m_PadBottom = padBottom;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_DataLayout = layout;
-    data.m_Parameters.m_DilationX = dilationX;
-    data.m_Parameters.m_DilationY = dilationY;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
-LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const boost::multi_array<T, 4>& input,
-    const boost::multi_array<T, 4>& kernel,
-    const boost::multi_array<B, 1>& bias,
-    const boost::multi_array<T, 4>& outputExpected,
-    const armnn::DataLayout dataLayout,
-    float qScale,
-    int32_t qOffset,
-    uint32_t padLeft = 1,
-    uint32_t padTop = 1,
-    uint32_t padRight = 1,
-    uint32_t padBottom = 1,
-    uint32_t strideX  = 1,
-    uint32_t strideY  = 1)
-{
-    unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
-    unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[3]);
-    unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[1]);
-    unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[2]);
-
-    unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
-    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
-    unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
-    unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
-
-    unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
-    unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
-    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
-    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
-
-    bool biasEnabled = bias.size() > 0;
-
-    // Creates the tensors.
-    armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
-                                       ArmnnType);
-    armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
-    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
-
-    // Construct the input data.
-    std::vector<T> inputData;
-    inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
-    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
-
-    // Construct the output data, with bias applied, as appropriate.
-    std::vector<T> outputData;
-    outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
-    armnn::Convolution2dQueueDescriptor data;
-
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
-    data.m_Parameters.m_StrideX = strideX;
-    data.m_Parameters.m_StrideY = strideY;
-    data.m_Parameters.m_PadLeft = padLeft;
-    data.m_Parameters.m_PadRight = padRight;
-    data.m_Parameters.m_PadTop = padTop;
-    data.m_Parameters.m_PadBottom = padBottom;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_DataLayout = dataLayout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const boost::multi_array<T, 4>& input,
-    const boost::multi_array<T, 4>& kernel,
-    const boost::multi_array<B, 1>& bias,
-    const boost::multi_array<T, 4>& outputExpected,
-    float qScale,
-    int32_t qOffset,
-    const armnn::DataLayout layout,
-    uint32_t padLeft = 0,
-    uint32_t padTop = 0,
-    uint32_t padRight = 0,
-    uint32_t padBottom = 0,
-    uint32_t strideX = 1,
-    uint32_t strideY = 1)
-{
-    unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
-    unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[1]);
-    unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[2]);
-    unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[3]);
-    unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
-    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
-    unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
-    unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
-    unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
-    unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
-    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
-    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
-
-    // If a bias is used, its size must equal the number of output channels.
-    bool biasEnabled = bias.size() > 0;
-    BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
-
-    // Creates the tensors.
-    armnn::TensorInfo inputTensorInfo =
-            armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo =
-            armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
-    armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
-    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        kernelDesc.SetQuantizationScale(qScale);
-        kernelDesc.SetQuantizationOffset(qOffset);
-        biasDesc.SetQuantizationScale(qScale*qScale);
-        biasDesc.SetQuantizationOffset(0);
-    }
-
-    // Construct the input data.
-    std::vector<T> inputData;
-    inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
-
-    // At this point if we require it permute the input data
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-    }
-
-    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
-
-    // Construct the output data, with bias applied, as appropriate.
-    std::vector<T> outputData;
-    outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
-    if (biasEnabled)
-    {
-        std::vector<T> biasV;
-        biasV.assign(bias.data(), bias.data() + outputChannels);
-        ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-            biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
-            outputWidth, outputHeight);
-    }
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-
-    // At this point if we require it permute the expected output
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
-        outputData = tmp;
-    }
-
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-    if (biasEnabled)
-    {
-        AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-    }
-
-    armnn::DepthwiseConvolution2dQueueDescriptor data;
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
-    data.m_Parameters.m_StrideX = strideX;
-    data.m_Parameters.m_StrideY = strideY;
-    data.m_Parameters.m_PadLeft = padLeft;
-    data.m_Parameters.m_PadRight = padRight;
-    data.m_Parameters.m_PadTop = padTop;
-    data.m_Parameters.m_PadBottom = padBottom;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_DataLayout = layout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    using B = armnn::ResolveType<ArmnnBType>;
-
-    unsigned int inputHeight = 3;
-    unsigned int inputWidth = 3;
-    unsigned int inputChannels = 2;
-    unsigned int inputNum = 1;
-
-    unsigned int kernelHeight = 3;
-    unsigned int kernelWidth = 3;
-    unsigned int kernelChannels = inputChannels;
-    unsigned int kernelDepthMultiplier = 1;
-
-    unsigned int outputHeight = 1;
-    unsigned int outputWidth = 1;
-    unsigned int outputChannels = kernelChannels;
-    unsigned int outputNum = inputNum;
-
-    armnn::TensorInfo inputTensorInfo =
-            armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo =
-            armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
-    armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
-                                 ArmnnType);
-    armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        kernelDesc.SetQuantizationScale(qScale);
-        kernelDesc.SetQuantizationOffset(qOffset);
-        biasDesc.SetQuantizationScale(qScale*qScale);
-        biasDesc.SetQuantizationOffset(0);
-    }
-    std::vector<T> inputData = std::vector<T>(
-            QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
-                    1.f, 2.f, 1.f,
-                    2.f, 1.f, 2.f,
-                    1.f, 2.f, 1.f,
-
-                    1.f, 2.f, 1.f,
-                    2.f, 1.f, 2.f,
-                    1.f, 2.f, 1.f,
-            }));
-    // at this point if we require it permute the input data
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-    }
-    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
-
-    std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
-                                            {0, 2}));
-    auto bias = MakeTensor<B, 1>(biasDesc, biasV);
-
-    std::vector<T> kernelData = std::vector<T>(
-            QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
-                    1.f, 0.f,  1.f,
-                    0.f, 0.f,  0.f,
-                    -1.f, 0.f, -1.f,
-
-                    1.f, 0.f,  1.f,
-                    0.f, 0.f,  0.f,
-                    -1.f, 0.f, -1.f,
-            }));
-    auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
-
-    // Manually calculated.
-    std::vector<T> outputImage(
-        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                           outputTensorInfo.GetQuantizationOffset(),
-                           {0.f, 0.f})
-    );
-
-    // Optionally apply bias to output image.
-    if(biasEnabled)
-    {
-        ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-                  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
-                  outputWidth, outputHeight);
-    }
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(outputImage.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
-        outputImage = tmp;
-    }
-
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::DepthwiseConvolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
-    data.m_Parameters.m_StrideX = 1;
-    data.m_Parameters.m_StrideY = 1;
-    data.m_Parameters.m_PadLeft = 0;
-    data.m_Parameters.m_PadRight = 0;
-    data.m_Parameters.m_PadTop = 0;
-    data.m_Parameters.m_PadBottom = 0;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_DataLayout = layout;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    using B = armnn::ResolveType<ArmnnBType>;
-
-    unsigned int depthMultiplier = 2;
-
-    unsigned int inputHeight    = 8;
-    unsigned int inputWidth     = 16;
-    unsigned int inputChannels  = 2;
-    unsigned int inputBatchSize = 1;
-
-    unsigned int kernelHeight = 5;
-    unsigned int kernelWidth  = 3;
-
-    unsigned int outputHeight    = inputHeight - kernelHeight + 1 + 2;
-    unsigned int outputWidth     = (inputWidth - kernelWidth + 1)/2;
-    unsigned int outputChannels  = inputChannels * depthMultiplier;
-    unsigned int outputBatchSize = inputBatchSize;
-
-    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
-            inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
-            outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
-    armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
-                                 ArmnnType);
-    armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        kernelDesc.SetQuantizationScale(qScale);
-        kernelDesc.SetQuantizationOffset(qOffset);
-        biasDesc.SetQuantizationScale(qScale*qScale);
-        biasDesc.SetQuantizationOffset(0);
-    }
-
-    // NOTE: originalInputData is in NCHW format
-    std::vector<T> originalInputData = std::vector<T>(
-            QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-            }));
-    std::vector<T> inputData = originalInputData;
-    // at this point if we require it permute the input data
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
-                            originalInputData.data(), inputData.data(), sizeof(T));
-    }
-    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
-
-    std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
-        {0, 2, 1, -1}));
-    auto bias = MakeTensor<B, 1>(biasDesc, biasV);
-
-    std::vector<T> kernelData = std::vector<T>(
-            QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
-                    1, 1, 1,
-                    1, -1, 1,
-                    1, 1, 1,
-                    1, 1, 1,
-                    1, 1, 1,
-
-                    2, 2, 2,
-                    2, 2, 2,
-                    2, 2, 2,
-                    2, 2, 2,
-                    2, 2, 2,
-
-                    0, 0, 0,
-                    0, -1, 0,
-                    0, 0, 0,
-                    0, 0, 0,
-                    0, 0, 0,
-
-                    0, 0, 0,
-                    0, 0, 0,
-                    0, 1, 0,
-                    0, 0, 0,
-                    0, 0, 0
-
-            }));
-    auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
-
-    // Manually calculated.
-    std::vector<T> originalOutputImage = std::vector<T>(
-        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
-            3.5f,  3.5f,  3.5f,  3.5f,  3.5f,  3.5f,  3.5f,
-            6.0f,  6.0f,  6.0f,  6.0f,  6.0f,  6.0f,  6.0f,
-            5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,
-            6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,
-            6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,
-            5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,
-
-            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
-            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
-            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
-            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
-
-            8.0f,  8.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            8.0f,  8.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
-            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f
-        }));
-
-    // Optionally apply bias to output image.
-    if(biasEnabled)
-    {
-        ApplyBias(originalOutputImage,
-                  outputTensorInfo.GetQuantizationScale(),
-                  outputTensorInfo.GetQuantizationOffset(),
-                  biasV,
-                  biasDesc.GetQuantizationScale(),
-                  biasDesc.GetQuantizationOffset(),
-                  outputWidth,
-                  outputHeight);
-    }
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-    std::vector<T> outputImage = originalOutputImage;
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
-                            originalOutputImage.data(), outputImage.data(), sizeof(T));
-    }
-
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::DepthwiseConvolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
-    data.m_Parameters.m_StrideX = 2;
-    data.m_Parameters.m_StrideY = 1;
-    data.m_Parameters.m_PadLeft = 0;
-    data.m_Parameters.m_PadRight = 0;
-    data.m_Parameters.m_PadTop = 1;
-    data.m_Parameters.m_PadBottom = 1;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_DataLayout = layout;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-        typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const boost::multi_array<T, 4>& originalInput,
-    const boost::multi_array<T, 4>& originalKernel,
-    const boost::multi_array<B, 1>& bias,
-    const boost::multi_array<T, 4>& originalOutputExpected,
-    float qScale,
-    int32_t qOffset,
-    const armnn::DataLayout layout = armnn::DataLayout::NCHW,
-    uint32_t padLeft = 0,
-    uint32_t padTop = 0,
-    uint32_t padRight = 0,
-    uint32_t padBottom = 0,
-    uint32_t strideX = 1,
-    uint32_t strideY = 1,
-    uint32_t dilationX = 1,
-    uint32_t dilationY = 1)
-{
-    unsigned int inputHeight   = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
-    unsigned int inputWidth    = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
-    unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
-    unsigned int inputNum      = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
-
-    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
-    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
-    unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
-    unsigned int outputNum      = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
-
-    unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
-    unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
-    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
-    unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
-
-    bool biasEnabled = bias.size() > 0;
-
-    // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
-    BOOST_ASSERT(inputNum == 1);
-    BOOST_ASSERT(outputNum == 1);
-
-    // If a bias is used, its size must equal the number of output channels.
-    BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
-
-
-    // Note these tensors will use two (identical) batches.
-    armnn::TensorInfo inputTensorInfo =
-            armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo =
-            armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
-
-    // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
-    armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
-
-    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-        kernelDesc.SetQuantizationScale(qScale);
-        kernelDesc.SetQuantizationOffset(qOffset);
-        biasDesc.SetQuantizationScale(qScale*qScale);
-        biasDesc.SetQuantizationOffset(0);
-    }
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-
-    // Construct input data
-    std::vector<T> input;
-    input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
-    std::vector<T> inputData;
-    inputData.insert(inputData.end(), input.begin(), input.end());
-    inputData.insert(inputData.end(), input.begin(), input.end());
-
-    // at this point if we require it permute the input data
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-    }
-
-    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
-
-    std::vector<T> output;
-    output.assign(originalOutputExpected.data(),
-                       originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
-
-    // Apply bias to output data if it is enabled.
-    if(biasEnabled)
-    {
-        std::vector<T> biasV;
-        biasV.assign(bias.data(), bias.data() + outputChannels);
-        ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-                  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
-                  outputWidth, outputHeight);
-    }
-
-    // Construct expected output data
-    std::vector<T> outputData;
-    outputData.insert(outputData.end(), output.begin(), output.end());
-    outputData.insert(outputData.end(), output.begin(), output.end());
-
-    // at this point if we require it permute the expected output
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
-        outputData = tmp;
-    }
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::DepthwiseConvolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
-    boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-
-    if(biasEnabled)
-    {
-        AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-    }
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
-    data.m_Parameters.m_StrideX = strideX;
-    data.m_Parameters.m_StrideY = strideY;
-    data.m_Parameters.m_PadLeft = padLeft;
-    data.m_Parameters.m_PadRight = padRight;
-    data.m_Parameters.m_PadTop = padTop;
-    data.m_Parameters.m_PadBottom = padBottom;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_DataLayout = layout;
-    data.m_Parameters.m_DilationX = dilationX;
-    data.m_Parameters.m_DilationY = dilationY;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T,4> Convolution1dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled)
-{
-    using B = armnn::ResolveType<ArmnnBType>;
-    // Until we have a specialist 1D convolution layer, we can fake one using
-    // 2D convolution with the final dimension set to 1.
-    // I don't anticipate this being particularly slow, given that convolution is implemented
-    // as a matrix multiplication, at which point dimension doesn't matter.
-
-    unsigned int batchSize      = 1;
-    unsigned int inputChannels  = 2;
-    unsigned int outputChannels = 3;
-    unsigned int inputSize      = 5; // The 1D size (could view as 'width' or 'height').
-    unsigned int kernelSize     = 3;
-    unsigned int padSize        = 2;
-    unsigned int stride         = 1;
-    unsigned int outputSize     = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
-
-    armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
-    armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
-    armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
-    armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputInfo.SetQuantizationScale(qScale);
-        inputInfo.SetQuantizationOffset(qOffset);
-        outputInfo.SetQuantizationScale(qScale);
-        outputInfo.SetQuantizationOffset(qOffset);
-        kernelInfo.SetQuantizationScale(qScale);
-        kernelInfo.SetQuantizationOffset(qOffset);
-        biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
-        biasInfo.SetQuantizationOffset(0);
-    }
-
-    std::vector<T> inputData(
-        QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), {
-            5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
-            -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
-        }));
-
-    std::vector<T> kernelData(
-        QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), {
-            1.0f, 0.0f, 0.0f,
-            0.0f, 2.0f, -1.5f,
-
-            0.0f, 0.0f, 0.0f,
-            0.2f, 0.2f, 0.2f,
-
-            0.5f, 0.0f, 0.5f,
-            0.0f, -1.0f, 0.0f
-        }));
-
-    std::vector<B> biasData(
-        QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), {
-            1.0f, 0.0f, 0.0f
-        }));
-
-    std::vector<T> outputData(
-        QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), {
-            4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
-            -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
-            2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
-        }));
-
-    // Optionally apply bias to output image.
-    if(biasEnabled)
-    {
-        ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
-            biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
-            1, outputSize);
-    }
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
-
-    armnn::Convolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle         weightsTensor(kernelInfo);
-    armnn::ScopedCpuTensorHandle         biasTensor(biasInfo);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
-    AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
-
-    AddInputToWorkload(data, info, inputInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
-
-    data.m_Weight         = &weightsTensor;
-    data.m_Bias           = &biasTensor;
-    data.m_Parameters.m_StrideX        = 1;
-    data.m_Parameters.m_StrideY        = stride;
-    data.m_Parameters.m_PadLeft        = 0;
-    data.m_Parameters.m_PadRight       = 0;
-    data.m_Parameters.m_PadTop         = padSize;
-    data.m_Parameters.m_PadBottom      = padSize;
-    data.m_Parameters.m_BiasEnabled    = biasEnabled;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), inputData.data());
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    // Output
-    LayerTestResult<T,4> ret(outputInfo);
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-    ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T,4> CompareConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory)
-{
-    unsigned int inputHeight   = 8;
-    unsigned int inputWidth    = 16;
-    unsigned int inputChannels = 3;
-    unsigned int inputNum      = 5;
-
-    unsigned int kernelHeight = 3;
-    unsigned int kernelWidth  = 3;
-
-    unsigned int strideX = 2;
-    unsigned int strideY = 3;
-    unsigned int padX    = 1;
-    unsigned int padY    = 1;
-
-    unsigned int outputNum      = inputNum;
-    unsigned int outputChannels = 2;
-    unsigned int outputHeight   = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
-    unsigned int outputWidth    = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
-
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-    armnn::TensorInfo kernelDesc;
-    armnn::TensorInfo biasDesc;
-
-    unsigned int inputShape[]  = {inputNum, inputChannels, inputHeight, inputWidth};
-    unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
-    unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
-    unsigned int biasShape[]   = {outputChannels};
-
-    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
-    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
-    kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
-    biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
-
-    LayerTestResult<T,4> ret(outputTensorInfo);
-
-    auto input  = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
-    auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
-    auto bias   = MakeRandomTensor<T, 1>(biasDesc, 1028);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::Convolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor;
-    data.m_Parameters.m_StrideX = strideX;
-    data.m_Parameters.m_StrideY = strideY;
-    data.m_Parameters.m_PadLeft = padX;
-    data.m_Parameters.m_PadRight = padX;
-    data.m_Parameters.m_PadTop = padY;
-    data.m_Parameters.m_PadBottom = padY;
-    data.m_Parameters.m_BiasEnabled = true;
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
-
-    armnn::Convolution2dQueueDescriptor refData = data;
-    armnn::WorkloadInfo               refInfo = info;
-    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
-    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
-
-    std::unique_ptr<armnn::IWorkload> workload  = workloadFactory.CreateConvolution2d(data, info);
-    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
-
-    outputHandleRef->Allocate();
-    inputHandleRef->Allocate();
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    workloadRef->PostAllocationConfigure();
-    workloadRef->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-    CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    const armnnUtils::DataLayoutIndexed& layout)
-{
-    unsigned int inputHeight = 8;
-    unsigned int inputWidth = 16;
-    unsigned int inputChannels = 3;
-    unsigned int inputNum = 5;
-
-    unsigned int kernelHeight = 3;
-    unsigned int kernelWidth = 3;
-    unsigned int channelMultiplier = 1;
-
-    unsigned int strideX = 2;
-    unsigned int strideY = 3;
-    unsigned int padX = 1;
-    unsigned int padY = 1;
-
-    unsigned int outputNum = inputNum;
-    unsigned int outputChannels = inputChannels * channelMultiplier;
-    unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
-    unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
-
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-    armnn::TensorInfo kernelDesc;
-    armnn::TensorInfo biasDesc;
-
-
-    std::vector<unsigned int> inputShape;
-    std::vector<unsigned int> outputShape;
-    std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
-    std::vector<unsigned int> biasShape{ outputChannels };
-    switch (layout.GetDataLayout())
-    {
-        case armnn::DataLayout::NCHW:
-            inputShape =  { inputNum, inputChannels, inputHeight, inputWidth };
-            outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
-            break;
-        case armnn::DataLayout ::NHWC:
-            inputShape =  { inputNum, inputHeight, inputWidth, inputChannels };
-            outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
-            break;
-        default:
-            throw armnn::InvalidArgumentException("unknown data layout ["
-                                                  + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
-    }
-
-    float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
-    float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
-    int32_t qOffset = 0;
-
-    inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
-    outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
-    kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
-    biasDesc = armnn::TensorInfo(
-        1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-
-    auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
-    auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
-    auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
-            biasDesc, 1028, 0.0f, 255.0f);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::DepthwiseConvolution2dQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
-    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor;
-    data.m_Parameters.m_StrideX = strideX;
-    data.m_Parameters.m_StrideY = strideY;
-    data.m_Parameters.m_PadLeft = padX;
-    data.m_Parameters.m_PadRight = padX;
-    data.m_Parameters.m_PadTop = padY;
-    data.m_Parameters.m_PadBottom = padY;
-    data.m_Parameters.m_BiasEnabled = true;
-    data.m_Parameters.m_DataLayout = layout.GetDataLayout();
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
-
-    armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
-    armnn::WorkloadInfo refInfo = info;
-    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
-    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
-    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
-
-    outputHandleRef->Allocate();
-    inputHandleRef->Allocate();
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    workloadRef->PostAllocationConfigure();
-    workloadRef->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-    CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
-
-    return ret;
-}
diff --git a/src/backends/backendsCommon/test/DataTypeUtils.hpp b/src/backends/backendsCommon/test/DataTypeUtils.hpp
new file mode 100644 (file)
index 0000000..01c7bab
--- /dev/null
@@ -0,0 +1,46 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <reference/workloads/Encoders.hpp>
+
+#include <vector>
+
+// Utility tenmplate to convert a collection of values to the correct type
+template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+std::vector<T> ConvertToDataType(const std::vector<float>& input,
+                                 const armnn::TensorInfo& inputTensorInfo)
+{
+    std::vector<T> output(input.size());
+    auto outputTensorInfo = inputTensorInfo;
+    outputTensorInfo.SetDataType(ArmnnType);
+
+    std::unique_ptr<armnn::Encoder<float>> pOutputEncoder = armnn::MakeEncoder<float>(outputTensorInfo, output.data());
+    armnn::Encoder<float>& rOutputEncoder = *pOutputEncoder;
+
+    for (auto it = input.begin(); it != input.end(); ++it)
+    {
+        rOutputEncoder.Set(*it);
+        ++rOutputEncoder;
+    }
+    return output;
+}
+
+// Utility tenmplate to convert a single value to the correct type
+template <typename T>
+T ConvertToDataType(const float& value,
+                    const armnn::TensorInfo& tensorInfo)
+{
+    std::vector<T> output(1);
+    std::unique_ptr<armnn::Encoder<float>> pEncoder = armnn::MakeEncoder<float>(tensorInfo, output.data());
+    armnn::Encoder<float>& rEncoder = *pEncoder;
+    rEncoder.Set(value);
+    return output[0];
+}
diff --git a/src/backends/backendsCommon/test/FullyConnectedTestImpl.hpp b/src/backends/backendsCommon/test/FullyConnectedTestImpl.hpp
deleted file mode 100644 (file)
index 402a3e6..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-//
-// Copyright Â© 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include <ResolveType.hpp>
-#include "WorkloadTestUtils.hpp"
-#include <backendsCommon/IBackendInternal.hpp>
-
-LayerTestResult<float, 2> FullyConnectedFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    bool transposeWeights)
-{
-    unsigned int inputWidth = 1;
-    unsigned int inputHeight = 1;
-    unsigned int inputChannels = 5;
-    unsigned int inputNum = 2;
-
-    unsigned int outputChannels = 3;
-    unsigned int outputNum = 2;
-
-    // Define the tensor descriptors.
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-    armnn::TensorInfo weightsDesc;
-    armnn::TensorInfo biasesDesc;
-
-    unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth };
-    unsigned int outputShape[] = { outputNum, outputChannels };
-    unsigned int weightsShape[] = { inputChannels, outputChannels };
-    if (transposeWeights)
-    {
-        std::swap(weightsShape[0], weightsShape[1]);
-    }
-    unsigned int biasShape[] = { outputChannels };
-
-    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
-    outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32);
-    weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32);
-    biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32);
-
-    LayerTestResult<float, 2> result(outputTensorInfo);
-
-    boost::multi_array<float, 4> input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(
-        {
-            1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
-
-            5.0f, 4.0f, 3.0f, 2.0f, 1.0f
-        })
-    );
-
-    boost::multi_array<float, 2> weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>(
-        {
-            .5f, 2.f, .5f,
-            .5f, 2.f, 1.f,
-            .5f, 2.f, 2.f,
-            .5f, 2.f, 3.f,
-            .5f, 2.f, 4.f
-        }));
-
-    if (transposeWeights)
-    {
-        weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>(
-        {
-            .5f, .5f, .5f, .5f, .5f,
-            2.f, 2.f, 2.f, 2.f, 2.f,
-            .5f, 1.f, 2.f, 3.f, 4.f
-        }));
-    }
-
-
-    std::vector<float> biasValues({0.f, 0.f, 0.f});
-    if (biasEnabled)
-    {
-        biasValues =  std::vector<float>({10.f, 20.f, 30.f});
-    }
-    boost::multi_array<float, 1> bias = MakeTensor<float, 1>(biasesDesc, biasValues);
-
-    result = SimpleFullyConnectedTestImpl<float>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo, outputTensorInfo,
-        weightsDesc, biasesDesc,
-        weights, bias, input,
-        biasEnabled, transposeWeights
-    );
-
-    result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, std::vector<float>(
-        {
-            0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0],
-            2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1],
-            0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2],
-
-            2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0],
-            10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1],
-            2.5f + 4.0f + 6.0f + 6.f + 4.f   + biasValues[2]
-        })
-    );
-
-    return result;
-}
-
-//
-// ArmNN variant of the AndroidNN fully_connected_float_large test.
-//
-// Tests the fully connected layer with large values, optionally transposing weights.
-// Note this is templated for consistency, but the nature of this tests makes it unlikely to be useful in Uint8 mode.
-//
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> FullyConnectedLargeTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool transposeWeights,
-    float qScale = 0.0f,
-    int32_t qOffset = 0)
-{
-    unsigned int inputWidth = 1;
-    unsigned int inputHeight = 1;
-    unsigned int inputChannels = 5;
-    unsigned int inputNum = 1;
-
-    unsigned int outputChannels = 1;
-    unsigned int outputNum = 1;
-
-    // Define the tensor descriptors.
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-    armnn::TensorInfo weightsDesc;
-    armnn::TensorInfo biasesDesc;
-
-    unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth };
-    unsigned int outputShape[] = { outputNum, outputChannels };
-    unsigned int weightsShape[] = { inputChannels, outputChannels };
-    if (transposeWeights)
-    {
-        std::swap(weightsShape[0], weightsShape[1]);
-    }
-
-    unsigned int biasShape[] = { outputChannels };
-
-    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
-    outputTensorInfo = armnn::TensorInfo(2, outputShape, ArmnnType);
-    weightsDesc = armnn::TensorInfo(2, weightsShape, ArmnnType);
-    biasesDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-    }
-
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputTensorInfo,
-        QuantizedVector<T>(qScale, qOffset, {
-            1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f,
-        })
-    );
-
-    boost::multi_array<T, 2> weights = MakeTensor<T, 2>(weightsDesc,
-        QuantizedVector<T>(qScale, qOffset, {
-            2.0f, 3.0f, 4.0f, 5.0f, 6.0f
-        })
-    );
-
-    std::vector<T> biasValues({900000.f});
-    boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasesDesc, biasValues);
-
-    result = SimpleFullyConnectedTestImpl<T>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo, outputTensorInfo,
-        weightsDesc, biasesDesc,
-        weights, bias, input,
-        true, transposeWeights
-    );
-
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
-        QuantizedVector<T>(qScale, qOffset, {
-            965432.0f,
-        })
-    );
-
-    return result;
-}
diff --git a/src/backends/backendsCommon/test/GatherTestImpl.hpp b/src/backends/backendsCommon/test/GatherTestImpl.hpp
deleted file mode 100644 (file)
index 8fbfeea..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-//
-// Copyright Â© 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include "WorkloadTestUtils.hpp"
-
-#include <armnn/Types.hpp>
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-
-template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>,
-    unsigned int paramsDim, unsigned int indicesDim, unsigned int OutputDim>
-LayerTestResult<T, OutputDim> GatherTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::TensorInfo& paramsInfo,
-    const armnn::TensorInfo& indicesInfo,
-    const armnn::TensorInfo& outputInfo,
-    const std::vector<T>& paramsData,
-    const std::vector<int32_t>& indicesData,
-    const std::vector<T>& outputData)
-{
-    auto params = MakeTensor<T, paramsDim>(paramsInfo, paramsData);
-    auto indices = MakeTensor<int32_t, indicesDim>(indicesInfo, indicesData);
-
-    LayerTestResult<T, OutputDim> result(outputInfo);
-    result.outputExpected = MakeTensor<T, OutputDim>(outputInfo, outputData);
-
-    std::unique_ptr<armnn::ITensorHandle> paramsHandle = workloadFactory.CreateTensorHandle(paramsInfo);
-    std::unique_ptr<armnn::ITensorHandle> indicesHandle = workloadFactory.CreateTensorHandle(indicesInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
-
-    armnn::GatherQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data,  info, paramsInfo, paramsHandle.get());
-    AddInputToWorkload(data, info, indicesInfo, indicesHandle.get());
-    AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateGather(data, info);
-
-    paramsHandle->Allocate();
-    indicesHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(paramsHandle.get(), params.origin());
-    CopyDataToITensorHandle(indicesHandle.get(), indices.origin());
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
-
-    return result;
-}
-
-template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 1> Gather1DParamsTestImpl(armnn::IWorkloadFactory& workloadFactory,
-                                             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo paramsInfo({ 8 }, ArmnnType);
-    armnn::TensorInfo indicesInfo({ 4 }, armnn::DataType::Signed32);
-    armnn::TensorInfo outputInfo({ 4 }, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        paramsInfo.SetQuantizationScale(1.0f);
-        paramsInfo.SetQuantizationOffset(1);
-        outputInfo.SetQuantizationScale(1.0f);
-        outputInfo.SetQuantizationOffset(1);
-    }
-    const std::vector<T> params = std::vector<T>({ 1, 2, 3, 4, 5, 6, 7, 8 });
-    const std::vector<int32_t> indices = std::vector<int32_t>({ 0, 2, 1, 5 });
-    const std::vector<T> expectedOutput = std::vector<T>({ 1, 3, 2, 6 });
-
-    return GatherTestImpl<ArmnnType, T, 1, 1, 1>(workloadFactory, memoryManager,
-                                                 paramsInfo, indicesInfo, outputInfo,
-                                                 params,indices, expectedOutput);
-}
-
-template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> GatherMultiDimParamsTestImpl(
-    armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo paramsInfo({ 5, 2 }, ArmnnType);
-    armnn::TensorInfo indicesInfo({ 3 }, armnn::DataType::Signed32);
-    armnn::TensorInfo outputInfo({ 3, 2 }, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        paramsInfo.SetQuantizationScale(1.0f);
-        paramsInfo.SetQuantizationOffset(1);
-        outputInfo.SetQuantizationScale(1.0f);
-        outputInfo.SetQuantizationOffset(1);
-    }
-
-    const std::vector<T> params = std::vector<T>({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 });
-    const std::vector<int32_t> indices = std::vector<int32_t>({ 1, 3, 4 });
-    const std::vector<T> expectedOutput = std::vector<T>({ 3, 4, 7, 8, 9, 10 });
-
-    return GatherTestImpl<ArmnnType, T, 2, 1, 2>(workloadFactory, memoryManager,
-                                                 paramsInfo, indicesInfo, outputInfo,
-                                                 params,indices, expectedOutput);
-}
-
-template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> GatherMultiDimParamsMultiDimIndicesTestImpl(
-    armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo paramsInfo({ 3, 2, 3}, ArmnnType);
-    armnn::TensorInfo indicesInfo({ 2, 3 }, armnn::DataType::Signed32);
-    armnn::TensorInfo outputInfo({ 2, 3, 2, 3 }, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        paramsInfo.SetQuantizationScale(1.0f);
-        paramsInfo.SetQuantizationOffset(1);
-        outputInfo.SetQuantizationScale(1.0f);
-        outputInfo.SetQuantizationOffset(1);
-    }
-
-    const std::vector<T> params = std::vector<T>({
-         1,  2,  3,
-         4,  5,  6,
-
-         7,  8,  9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18 });
-    const std::vector<int32_t> indices = std::vector<int32_t>({ 1, 2, 1, 2, 1, 0 });
-    const std::vector<T> expectedOutput = std::vector<T>({
-         7,  8,  9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-         7,  8,  9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18,
-         7,  8,  9,
-        10, 11, 12,
-         1,  2,  3,
-         4,  5,  6 });
-
-    return GatherTestImpl<ArmnnType, T, 3, 2, 4>(workloadFactory, memoryManager,
-                                                 paramsInfo, indicesInfo, outputInfo,
-                                                 params,indices, expectedOutput);
-}
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
deleted file mode 100644 (file)
index 2d71e60..0000000
+++ /dev/null
@@ -1,8043 +0,0 @@
-//
-// Copyright Â© 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#include "LayerTests.hpp"
-#include "WorkloadTestUtils.hpp"
-#include "TensorUtils.hpp"
-#include <ResolveType.hpp>
-
-#include "test/TensorHelpers.hpp"
-#include "TensorCopyUtils.hpp"
-#include "Permute.hpp"
-
-#include <boost/test/unit_test.hpp>
-#include <boost/assert.hpp>
-
-#include <armnn/LayerSupport.hpp>
-
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-
-#include <algorithm>
-#include <boost/cast.hpp>
-
-#include "WorkloadTestUtils.hpp"
-#include "Conv2dTestImpl.hpp"
-#include "BatchNormTestImpl.hpp"
-#include "ActivationTestImpl.hpp"
-#include "Pooling2dTestImpl.hpp"
-#include "FullyConnectedTestImpl.hpp"
-#include "GatherTestImpl.hpp"
-#include "SpaceToBatchNdTestImpl.hpp"
-#include "SpaceToDepthTestImpl.hpp"
-#include "SplitterTestImpl.hpp"
-#include "SoftmaxTestImpl.hpp"
-#include "StridedSliceTestImpl.hpp"
-#include "NormTestImpl.hpp"
-#include "LstmTestImpl.hpp"
-#include "ConvertFp16ToFp32TestImpl.hpp"
-#include "ConvertFp32ToFp16TestImpl.hpp"
-#include "DebugTestImpl.hpp"
-#include "DequantizeTestImpl.hpp"
-#include "QuantizeTestImpl.hpp"
-#include "TransposeConvolution2dTestImpl.hpp"
-
-// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
-static std::vector<float> ConvInput3x8x16({
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-});
-
-// 2-channel bias used by a number of Conv2d tests.
-static std::vector<float> Bias2({0, 2});
-
-static std::vector<float> Bias4({1, 2, 3, 4});
-
-static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
-
-struct Simple3dSoftmaxOutputData
-{
-    const std::vector<float> outputData =
-            {
-                0.0964599f, 0.26220518f, 0.0964599f, 0.0964599f,
-                0.15903549f, 0.0964599f, 0.0964599f, 0.0964599f
-            };
-
-    const armnn::TensorShape inputShape{ 1, 8, 1 };
-
-    const std::vector<float> inputData =
-            {
-                    0.f, 1.f, 0.f, 0.f,
-                    .5f, 0.f, 0.f, 0.f,
-            };
-};
-
-struct Simple4dSoftmaxData
-{
-    const armnn::TensorShape inputShape{ 1, 8, 1, 1 };
-
-    const std::vector<float> outputData = { 0.0964599f, 0.26220518f, 0.0964599f, 0.0964599f,
-                                            0.15903549f, 0.0964599f, 0.0964599f, 0.0964599f };
-    const std::vector<float> inputData =
-            {
-                    0.f, 1.f, 0.f, 0.f,
-                    .5f, 0.f, 0.f, 0.f
-            };
-};
-
-// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
-{
-    if(biasEnabled)
-    {
-        armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
-        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias2));
-        return bias;
-    }
-    else
-    {
-        return boost::multi_array<T, 1>();
-    }
-}
-
-// Helper function that returns either Bias4 or an empty vector depending on whether bias is enabled.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
-{
-    if(biasEnabled)
-    {
-        armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
-        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias4));
-        return bias;
-    }
-    else
-    {
-        return boost::multi_array<T, 1>();
-    }
-}
-
-// Helper function that returns either Bias8 or an empty vector depending on whether bias is enabled.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
-{
-    if(biasEnabled)
-    {
-        armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
-        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias8));
-        return bias;
-    }
-    else
-    {
-        return boost::multi_array<T, 1>();
-    }
-}
-
-// Helper function that returns either Bias4 or an empty vector depending on whether bias is enabled.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
-{
-    const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
-    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
-    const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
-
-    switch (outputChannels)
-    {
-        case 2:
-        default:
-        {
-            return GetBias2<ArmnnType>(biasEnabled, qScale);
-        }
-        case 4:
-        {
-            return GetBias4<ArmnnType>(biasEnabled, qScale);
-        }
-        case 8:
-        {
-            return GetBias8<ArmnnType>(biasEnabled, qScale);
-        }
-    }
-}
-
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    // Use common single-batch 3-channel 16x8 image.
-    armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
-
-    // Use a 2-element batch with 3-channel 3x5 kernels.
-    armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
-    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            1, 1, 1,
-            1, -1, 1,
-            1, 1, 1,
-            1, 1, 1,
-            1, 1, 1,
-
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-
-            2, 2, 2,
-            2, 2, 2,
-            2, 2, 2,
-            2, 2, 2,
-            2, 2, 2,
-
-
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-
-            1, 1, 1,
-            1, 1, 1,
-            1, 1, 1,
-            1, 1, 1,
-            1, 1, 1,
-
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0
-        })));
-
-    // Expected output is 2 batch elements of a 1-channel 14x4 image.
-    armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
-            -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
-            -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
-            -23.5f, -23.5f, -23.5f,
-            -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
-            -23.5f, -23.5f, -23.5f,
-
-            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-        })));
-
-    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
-
-    // Use common single-batch 3-channel 16x8 image.
-    armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
-
-    // Use a 2-element batch of 3-channel 3x3 kernels.
-    armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
-    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            1, 1, 1,
-            1, -1, 1,
-            1, 1, 1,
-
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-
-            2, 2, 2,
-            2, 2, 2,
-            2, 2, 2,
-
-
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0,
-
-            1, 1, 1,
-            1, 1, 1,
-            1, 1, 1,
-
-            0, 0, 0,
-            0, 0, 0,
-            0, 0, 0
-        })));
-
-    // Expected output is 1 batch of a 2-channel 14x6 image.
-    armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
-            -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
-            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
-            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
-            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
-
-            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-        })));
-
-    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled,
-    armnn::DataLayout dataLayout)
-{
-    // Use common single-batch 5x5 image.
-
-    armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
-                                                      {
-                                                       1, 5, 2, 3,
-                                                       8, 7, 3, 6,
-                                                       3, 3, 9, 1
-                                                       });
-
-
-    // Use a 2-element batch of 3-channel 3x3 kernels.
-    armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
-    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
-                                                                    4, 5, 6,
-                                                                    0, 0, 0,
-                                                                    3, 2, 1
-                                                                    });
-
-    // Expected output is 1 batch of a 5x5 image.
-    armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
-
-    const std::vector<float> outputData =
-            {
-                    23, 41, 33, 21,
-                    44, 65, 76, 52,
-                    82, 85, 79, 42
-            };
-
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
-
-    return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        boost::multi_array<T, 1>(),
-        expectedOutput,
-        dataLayout,
-        qScale,
-        qOffset);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float qScale,
-        int32_t qOffset,
-        bool biasEnabled,
-        const armnn::DataLayout& dataLayout)
-{
-    // Input is a single-batch, 1 channel, 5x5 image.
-    armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
-            {
-                1, 5, 2, 3, 5,
-                8, 7, 3, 6, 3,
-                3, 3, 9, 1, 9,
-                4, 1, 8, 1, 3,
-                6, 8, 1, 9, 2
-            });
-
-    // Use a 3x3 kernel.
-    armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
-    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
-            {
-                4, 5, 6,
-                0, 0, 0,
-                3, 2, 1
-            });
-
-    // Expected output is a single-batch, 1 channel, 3x3 image.
-    armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
-
-    const std::vector<T> outputData =
-            {
-                23, 33, 24,
-                91, 99, 48,
-                26, 50, 19
-            };
-
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
-
-    uint32_t padLeft = 1;
-    uint32_t padTop = 1;
-    uint32_t padRight = 1;
-    uint32_t padBottom = 1;
-    uint32_t strideX  = 2;
-    uint32_t strideY  = 2;
-
-    return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        boost::multi_array<T, 1>(),
-        expectedOutput,
-        dataLayout,
-        qScale,
-        qOffset,
-        padLeft,
-        padTop,
-        padRight,
-        padBottom,
-        strideX,
-        strideY);
-}
-
-LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
-}
-
-LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
-}
-
-LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled)
-{
-    return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
-        workloadFactory,
-        memoryManager,
-        0.f,
-        0,
-        biasEnabled,
-        armnn::DataLayout::NHWC);
-}
-
-LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout)
-{
-    return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
-        workloadFactory,
-        memoryManager,
-        0.f,
-        0,
-        biasEnabled,
-        layout);
-}
-
-LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-            workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout,
-    float qScale,
-    int32_t qOffset)
-{
-    // Use a single-batch 1-channel 3x3 image as input.
-    armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            11,21,31,
-            12,22,32,
-            13,23,33
-        })));
-
-    // Use 1 batch of a 1-channel 2x2 kernel.
-    armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
-    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            -11,-21,
-            -12,-22,
-        })));
-
-// Expected output is 1 batch of a 1-channel 6x8 image.
-// Manually calculated like this:
-//[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
-//[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
-//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
-//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
-//[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
-//[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
-//[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
-    armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-               0,    0,      0,    0,    0,    0,
-            -242,  -594,  -934, -372,    0,    0,
-            -495, -1190, -1850, -725,    0,    0,
-            -538, -1256, -1916, -748,    0,    0,
-            -273, -626,  -946,  -363,    0,    0,
-               0,    0,     0,     0,    0,    0,
-               0,    0,     0,     0,    0,    0,
-               0,    0,     0,     0,    0,    0
-        })));
-
-    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(false, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout,
-        1,  // Padding left.
-        2,  // Padding top.
-        3,  // Padding right.
-        4); // Padding bottom.
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout,
-    float qScale,
-    int32_t qOffset)
-{
-    // Use a single-batch 1-channel 5x5 image as input.
-    armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
-    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            11,21,31,41,51,
-            12,22,32,42,52,
-            13,23,33,43,53,
-            14,24,34,44,54,
-            15,25,35,45,55,
-        })));
-
-    // Use 1 batch of a 1-channel 4x4 kernel.
-    armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
-    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            -11,-21,-31,-41,
-            -12,-22,-32,-42,
-            -13,-23,-33,-43,
-            -14,-24,-34,-44,
-        })));
-
-    // Expected output is 1 batch of a 1-channel 5x5 image.
-    armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
-    std::vector<T> myVec(outputDesc.GetNumElements(), 0);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-            -7140, -10580, -13940,  -9300, -5230,
-            -9590, -14120, -18520, -12290, -6860,
-            -9980, -14560, -18960, -12560, -7000,
-            -7518, -10904, -14144,  -9318, -5152,
-            -5032,  -7256,  -9376,  -6142, -3368,
-        })));
-
-    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(false, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout,
-        1,  // Padding left.
-        1,  // Padding top.
-        2,  // Padding right.
-        2); // Padding bottom.
-}
-
-LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::DataLayout layout)
-{
-    return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
-            workloadFactory, memoryManager, layout, 0.0f, 0);
-}
-
-LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::DataLayout layout)
-{
-    return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
-            <armnn::DataType::Float32, armnn::DataType::Float32>(
-            workloadFactory, memoryManager, layout, 0.0f, 0);
-}
-
-LayerTestResult<float, 4> Convolution1dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled)
-{
-    return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
-            workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
-}
-
-LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled)
-{
-    return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-            workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
-}
-
-LayerTestResult<float,4> CompareConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory)
-{
-    return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, refWorkloadFactory);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const std::vector<float>& inputNoQuantizedValues,
-    armnn::TensorInfo& inputTensorInfo,
-    const std::vector<float>& kernelNoQuantizedValues,
-    armnn::TensorInfo& kernelTensorInfo,
-    const std::vector<float>& outputExpectedNoQuantizedValues,
-    armnn::TensorInfo& outputTensorInfo,
-    uint32_t dilationX,
-    uint32_t dilationY,
-    armnn::DataLayout layout = armnn::DataLayout::NCHW,
-    uint32_t padLeft = 0,
-    uint32_t padTop = 0,
-    uint32_t padRight = 0,
-    uint32_t padBottom = 0,
-    uint32_t strideX  = 1,
-    uint32_t strideY  = 1,
-    bool biasEnabled = false
-)
-{
-    float qScale;
-    int32_t qOffset;
-    switch (ArmnnType)
-    {
-        case armnn::DataType::QuantisedAsymm8:
-        {
-            qScale = 0.1f;
-            qOffset = 128;
-            break;
-        }
-        case armnn::DataType::QuantisedSymm16:
-        {
-            qScale = 0.1f;
-            qOffset = 0;
-            break;
-        }
-        case armnn::DataType::Float32:
-        default:
-        {
-            qScale = 0.f;
-            qOffset = 0;
-            break;
-        }
-    }
-
-    inputTensorInfo.SetQuantizationScale(qScale);
-    inputTensorInfo.SetQuantizationOffset(qOffset);
-    kernelTensorInfo.SetQuantizationScale(qScale);
-    kernelTensorInfo.SetQuantizationOffset(qOffset);
-    outputTensorInfo.SetQuantizationScale(qScale);
-    outputTensorInfo.SetQuantizationOffset(qOffset);
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo,
-                                  std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                    inputTensorInfo.GetQuantizationOffset(),
-                                                                    inputNoQuantizedValues)));
-    auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
-                                  std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
-                                                                    kernelTensorInfo.GetQuantizationOffset(),
-                                                                    kernelNoQuantizedValues)));
-    auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
-                                           std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                             outputTensorInfo.GetQuantizationOffset(),
-                                                                             outputExpectedNoQuantizedValues)));
-
-    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            input,
-            kernel,
-            GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
-            expectedOutput,
-            qScale,
-            qOffset,
-            layout,
-            padLeft,
-            padTop,
-            padRight,
-            padBottom,
-            strideX,
-            strideY,
-            dilationX,
-            dilationY);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-    {
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
-    std::vector<float> kernelNoQuantizedValues =
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9
-    };
-
-    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
-    // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-    {
-        6., 5., 5., 5.,
-        6., 5., 5., 5.,
-        6., 5., 5., 5.,
-        3., 2., 2., 2.
-    };
-
-    return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            3,
-            3,
-            layout,
-            biasEnabled);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-    {
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
-    std::vector<float> kernelNoQuantizedValues =
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9
-    };
-
-    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
-    // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-    {
-        12., 10., 10., 10.,
-        12., 10., 10., 10.,
-        12., 10., 10., 10.,
-         6.,  4.,  4.,  4.
-    };
-
-    return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            3,
-            3,
-            layout,
-            biasEnabled);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
-        armnn::IWorkloadFactory &workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-    {
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-    };
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
-    std::vector<float> kernelNoQuantizedValues =
-    {
-        1, 2,
-        3, 4
-    };
-
-    // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
-    // therefore the output will be 4x4: (I âˆ’ K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
-    // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-    {
-        4,  7,  7, 3,
-        6, 10, 10, 4,
-        6, 10, 10, 4,
-        2,  3,  3, 1
-    };
-    uint32_t padLeft = 1;
-    uint32_t padTop = 1;
-    uint32_t padRight = 1;
-    uint32_t padBottom = 1;
-
-    return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            2,
-            2,
-            layout,
-            padLeft,
-            padTop,
-            padRight,
-            padBottom,
-            3,
-            3,
-            biasEnabled
-            );
-}
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-    armnn::IWorkloadFactory&,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-    bool,
-    armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
-Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-    armnn::IWorkloadFactory&,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-    bool,
-    armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-    armnn::IWorkloadFactory&,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-    bool,
-    armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-    armnn::IWorkloadFactory&,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-    bool,
-    armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
-Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-    armnn::IWorkloadFactory&,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-    bool,
-    armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-    armnn::IWorkloadFactory&,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-    bool,
-    armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
-Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    // Use a single-batch 2-channel 5x5 image as input.
-    armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
-    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
-        {
-             0,  1,  2,  3,  4,
-             5,  6,  7,  8,  9,
-            10, 11, 12, 13, 14,
-            15, 16, 17, 18, 19,
-            20, 21, 22, 23, 24,
-
-            25, 26, 27, 28, 29,
-            30, 31, 32, 33, 34,
-            35, 36, 37, 38, 39,
-            40, 41, 42, 43, 44,
-            45, 46, 47, 48, 49
-        })));
-
-    // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
-    armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
-    auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
-        QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
-        {
-            32, 31, 30, 29,
-            28, 27, 26, 25,
-            24, 23, 22, 21,
-            20, 19, 18, 17,
-
-            16, 15, 14, 13,
-            12, 11, 10,  9,
-             8,  7,  6,  5,
-             4,  3,  2,  1
-        })));
-
-    // Expected output is 1 batch of a 2-channel 5x5 image.
-    // Calculated using the python tensorflow library with strideX=1, strideY=1.
-    armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-        {
-            1062, 1580, 1850, 1530, 1117,
-            2140, 3108, 3500, 2842, 2042,
-            3580, 5068, 5460, 4342, 3062,
-            3618, 5072, 5390, 4248, 2971,
-            3074, 4282, 4510, 3533, 2457,
-
-            1550, 2284, 2362, 1955, 1428,
-            2910, 4206, 4342, 3528, 2536,
-            3390, 4886, 5022, 4068, 2916,
-            3566, 5056, 5182, 4133, 2922,
-            3100, 4352, 4452, 3517, 2465
-        })));
-
-    return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout,
-        1,  // Padding left.
-        1,  // Padding top.
-        2,  // Padding right.
-        2,  // Padding bottom.
-        1,  // strideX
-        1); // strideY
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled)
-{
-    auto layout = armnn::DataLayout::NHWC;
-
-    armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
-    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
-        {
-             0,  1,  2,  3,  4,
-             5,  6,  7,  8,  9,
-            10, 11, 12, 13, 14,
-            15, 16, 17, 18, 19,
-            20, 21, 22, 23, 24,
-
-            25, 26, 27, 28, 29,
-            30, 31, 32, 33, 34,
-            35, 36, 37, 38, 39,
-            40, 41, 42, 43, 44,
-            45, 46, 47, 48, 49
-        })));
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
-    auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
-        QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
-        {
-             32, 31, 30, 29,
-             28, 27, 26, 25,
-             24, 23, 22, 21,
-             20, 19, 18, 17,
-
-             16, 15, 14, 13,
-             12, 11, 10,  9,
-              8,  7,  6,  5,
-              4,  3,  2,  1
-        })));
-
-    armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-        {
-            1062, 1580, 1850, 1530, 1117,
-            2140, 3108, 3500, 2842, 2042,
-            3580, 5068, 5460, 4342, 3062,
-            3618, 5072, 5390, 4248, 2971,
-            3074, 4282, 4510, 3533, 2457,
-
-            1550, 2284, 2362, 1955, 1428,
-            2910, 4206, 4342, 3528, 2536,
-            3390, 4886, 5022, 4068, 2916,
-            3566, 5056, 5182, 4133, 2922,
-            3100, 4352, 4452, 3517, 2465
-        })));
-
-    return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout,
-        1,  // Padding left.
-        1,  // Padding top.
-        2,  // Padding right.
-        2,  // Padding bottom.
-        1,  // strideX
-        1);  // strideY
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
-         typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool biasEnabled)
-{
-    auto layout = armnn::DataLayout::NHWC;
-
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
-    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
-        {
-             0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 0, 0, 1, 1, 1, 0, 0, 0,
-             0, 0, 0, 1, 1, 1, 0, 0, 0,
-             0, 0, 0, 1, 1, 1, 0, 0, 0,
-             0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 0, 0, 0, 0, 0, 0, 0, 0
-        })));
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
-    auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
-        QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
-        {
-             1, 2, 3,
-             4, 5, 6,
-             7, 8, 9
-        })));
-
-    uint32_t padLeft = 0;
-    uint32_t padTop = 0;
-    uint32_t padRight = 0;
-    uint32_t padBottom = 0;
-    uint32_t strideX  = 1;
-    uint32_t strideY  = 1;
-    uint32_t dilationX  = 3;
-    uint32_t dilationY  = 3;
-
-    // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
-    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
-        {
-             5, 5, 5,
-             5, 5, 5,
-             5, 5, 5
-        })));
-
-    return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-        workloadFactory,
-        memoryManager,
-        input,
-        kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
-        expectedOutput,
-        qScale,
-        qOffset,
-        layout,
-        padLeft,
-        padTop,
-        padRight,
-        padBottom,
-        strideX,
-        strideY,
-        dilationX,
-        dilationY);
-}
-
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const std::vector<float>& inputNoQuantizedValues,
-        armnn::TensorInfo& inputTensorInfo,
-        const std::vector<float>& kernelNoQuantizedValues,
-        armnn::TensorInfo& kernelTensorInfo,
-        const std::vector<float>& outputExpectedNoQuantizedValues,
-        armnn::TensorInfo& outputTensorInfo,
-        uint32_t dilationX,
-        uint32_t dilationY,
-        armnn::DataLayout layout = armnn::DataLayout::NCHW,
-        bool biasEnabled = false)
-{
-    float qScale;
-    int32_t qOffset;
-    switch (ArmnnType)
-    {
-        case armnn::DataType::QuantisedAsymm8:
-        {
-            qScale = 0.1f;
-            qOffset = 128;
-            break;
-        }
-        case armnn::DataType::QuantisedSymm16:
-        {
-            qScale = 0.1f;
-            qOffset = 0;
-            break;
-        }
-        case armnn::DataType::Float32:
-        default:
-        {
-            qScale = 0.f;
-            qOffset = 0;
-            break;
-        }
-    }
-
-    inputTensorInfo.SetQuantizationScale(qScale);
-    inputTensorInfo.SetQuantizationOffset(qOffset);
-    kernelTensorInfo.SetQuantizationScale(qScale);
-    kernelTensorInfo.SetQuantizationOffset(qOffset);
-    outputTensorInfo.SetQuantizationScale(qScale);
-    outputTensorInfo.SetQuantizationOffset(qOffset);
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo,
-                                  std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                    inputTensorInfo.GetQuantizationOffset(),
-                                                                    inputNoQuantizedValues)));
-    auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
-                                   std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
-                                                                     kernelTensorInfo.GetQuantizationOffset(),
-                                                                     kernelNoQuantizedValues)));
-    auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
-                                           std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                             outputTensorInfo.GetQuantizationOffset(),
-                                                                             outputExpectedNoQuantizedValues)));
-
-    uint32_t padLeft = 0;
-    uint32_t padTop = 0;
-    uint32_t padRight = 0;
-    uint32_t padBottom = 0;
-    uint32_t strideX  = 1;
-    uint32_t strideY  = 1;
-
-    return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            input,
-            kernel,
-            GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
-            expectedOutput,
-            qScale,
-            qOffset,
-            layout,
-            padLeft,
-            padTop,
-            padRight,
-            padBottom,
-            strideX,
-            strideY,
-            dilationX,
-            dilationY);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-            {
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-            };
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
-    std::vector<float> kernelNoQuantizedValues =
-            {
-                    1, 2, 3,
-                    4, 5, 6,
-                    7, 8, 9
-            };
-
-    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
-    // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-            {
-                    6., 5., 5., 5.,
-                    6., 5., 5., 5.,
-                    6., 5., 5., 5.,
-                    3., 2., 2., 2.
-            };
-
-    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            3,
-            3,
-            layout,
-            biasEnabled);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-            {
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-            };
-
-    armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
-    std::vector<float> kernelNoQuantizedValues =
-            {
-                    1, 2, 3,
-                    4, 5, 6,
-                    7, 8, 9,
-
-                    1, 2, 3,
-                    4, 5, 6,
-                    7, 8, 9
-            };
-
-    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
-    // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
-    armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-            {
-                    6., 5., 5., 5.,
-                    6., 5., 5., 5.,
-                    6., 5., 5., 5.,
-                    3., 2., 2., 2.,
-
-                    6., 5., 5., 5.,
-                    6., 5., 5., 5.,
-                    6., 5., 5., 5.,
-                    3., 2., 2., 2.
-            };
-
-    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            3,
-            3,
-            layout,
-            biasEnabled);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
-            armnn::IWorkloadFactory& workloadFactory,
-            const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-            bool biasEnabled,
-            const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-            {
-                    10.0, 10.0, 10.0,
-                    10.0, 10.0, 10.0,
-                    10.0, 10.0, 10.0,
-
-                    21.0, 22.0, 23.0,
-                    24.0, 25.0, 26.0,
-                    27.0, 28.0, 29.0
-            };
-
-    armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
-
-    std::vector<float> kernelNoQuantizedValues =
-            {
-                    0.25f, 0.25f,
-                    0.25f, 0.25f,
-
-                    0.25f, 0.25f,
-                    0.25f, 0.25f,
-
-                    0.0f , 0.0f,
-                    0.0f , 0.1f,
-
-                    0.0f , 0.0f,
-                    0.0f , 0.1f,
-
-                    0.2f , 0.0f,
-                    0.0f , 0.0f,
-
-                    0.2f , 0.0f,
-                    0.0f , 0.0f,
-
-                    0.0f , 0.3f,
-                    0.0f , 0.0f,
-
-                    0.0f , 0.3f,
-                    0.0f , 0.0f
-            };
-
-    armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-            {
-                    10.f, 10.f,
-                    10.f, 10.f,
-
-                    1.f, 1.f,
-                    1.f, 1.f,
-
-                    2.f, 2.f,
-                    2.f, 2.f,
-
-                    3.f, 3.f,
-                    3.f, 3.f,
-
-                    23.f, 24.f,
-                    26.f, 27.f,
-
-                    2.5f, 2.6000001f,
-                    2.8f, 2.9f,
-
-                    4.2000003f, 4.4f,
-                    4.8f, 5.f,
-
-                    6.6000004f, 6.9f,
-                    7.5000005f, 7.8f
-            };
-
-
-    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            1,
-            1,
-            layout,
-            biasEnabled);
-}
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
-LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
-            armnn::IWorkloadFactory& workloadFactory,
-            const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-            bool biasEnabled,
-            const armnn::DataLayout layout)
-{
-    armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
-    std::vector<float> inputNoQuantizedValues =
-            {
-                    10.0, 10.0, 10.0,
-                    10.0, 10.0, 10.0,
-                    10.0, 10.0, 10.0,
-
-                    21.0, 22.0, 23.0,
-                    24.0, 25.0, 26.0,
-                    27.0, 28.0, 29.0
-            };
-
-    armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
-
-    std::vector<float> kernelNoQuantizedValues =
-            {
-                    0.25f, 0.25f,
-                    0.25f, 0.25f,
-
-                    0.2f , 0.0f,
-                    0.0f , 0.0f,
-
-                    0.0f , 0.0f,
-                    0.0f , 0.1f,
-
-                    0.0f , 0.3f,
-                    0.0f , 0.0f
-
-            };
-
-    armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
-    std::vector<float> outputExpectedNoQuantizedValues =
-            {
-                    10.f, 10.f,
-                    10.f, 10.f,
-
-                    1.f, 1.f,
-                    1.f, 1.f,
-
-                    4.2000003f, 4.4f,
-                    4.8f, 5.f,
-
-                    6.6000004f, 6.9f,
-                    7.5000005f, 7.8f
-            };
-
-
-    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
-            workloadFactory,
-            memoryManager,
-            inputNoQuantizedValues,
-            inputTensorInfo,
-            kernelNoQuantizedValues,
-            kernelTensorInfo,
-            outputExpectedNoQuantizedValues,
-            outputTensorInfo,
-            1,
-            1,
-            layout,
-            biasEnabled);
-}
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-        armnn::IWorkloadFactory&,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-        bool,
-        armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
-DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-        armnn::IWorkloadFactory&,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-        bool,
-        armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-        armnn::IWorkloadFactory&,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-        bool,
-        armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-        armnn::IWorkloadFactory&,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-        bool,
-        armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
-DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-        armnn::IWorkloadFactory&,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-        bool,
-        armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-        armnn::IWorkloadFactory&,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
-        bool,
-        armnn::DataLayout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-        armnn::IWorkloadFactory &workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout);
-
-template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
-DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
-        armnn::IWorkloadFactory &workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> DepthwiseConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
-}
-
-LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled)
-{
-    return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
-}
-
-LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
-}
-
-LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
-    auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
-
-    std::vector<float> kernelData;
-    std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
-    for (unsigned int i = 0; i < 64; ++i)
-    {
-        kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
-    }
-    armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
-    auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
-
-    std::vector<float> expectedOutputData(64, 0.f);
-    armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
-    auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
-
-    return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
-            workloadFactory,
-            memoryManager,
-            input,
-            kernel,
-            boost::multi_array<float, 1>(),
-            expectedOutput,
-            0.f,
-            0,
-            armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
-}
-
-LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
-            workloadFactory,
-            memoryManager,
-            0.f,
-            0,
-            false);
-}
-
-LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
-                armnn::IWorkloadFactory& workloadFactory,
-                const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-                bool biasEnabled,
-                const armnn::DataLayout layout)
-{
-    return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
-        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
-}
-
-LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    const armnn::DataLayout layout)
-{
-    return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, refWorkloadFactory, layout);
-}
-
-LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    const armnn::DataLayout layout)
-{
-    return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, refWorkloadFactory, layout);
-}
-
-LayerTestResult<float,4> SimpleNormalizationAcrossTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
-    auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
-    return SimpleNormalizationTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
-}
-
-LayerTestResult<float,4> SimpleNormalizationWithinTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
-    auto normChannel = armnn::NormalizationAlgorithmChannel::Within;
-    return SimpleNormalizationTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
-}
-
-LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
-    auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
-    return SimpleNormalizationNhwcTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
-}
-
-LayerTestResult<float,2> SimpleSoftmaxTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta)
-{
-    return SimpleSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta);
-}
-
-LayerTestResult<float,2> SimpleAxisSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis)
-{
-    return SimpleSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, axis);
-}
-
-LayerTestResult<float,3> Simple3dSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    Simple3dSoftmaxOutputData data;
-    return Simple3dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta,
-                                                             data.inputShape, data.outputData, data.inputData);
-}
-
-LayerTestResult<float,3> Simple3dAxisSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis)
-{
-    armnn::TensorShape inputShape;
-    std::vector<float> inputData;
-    std::vector<float> outputData;
-    switch (axis)
-    {
-    case -3:
-    case 0:
-        {
-            inputShape = {5, 2, 2};
-
-            inputData =
-                    {
-                            17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
-
-                            15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f
-                    };
-
-            outputData =
-                    {
-                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
-                            0.236882800924671f,
-                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
-                            0.087144312427294f,
-
-                            0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
-                            0.032058600957022f,
-                            0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f
-                    };
-            break;
-        }
-    case -2:
-    case 1:
-        {
-            inputShape = {2, 5, 2};
-
-            inputData =
-                    {
-                            17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
-
-                            17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
-                    };
-
-            outputData =
-                    {
-                            0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                            0.087144312427294f,
-                            0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f,
-
-                            0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                            0.087144312427294f,
-                            0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f
-                    };
-        break;
-        }
-    case -1:
-    case 2:
-        {
-            inputShape = {2, 2, 5};
-
-            inputData =
-                    {
-                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
-                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
-                    };
-
-            outputData =
-                    {
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f
-                    };
-            break;
-        }
-    }
-
-    return Simple3dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta,
-                                                             inputShape, outputData, inputData, axis);
-}
-
-LayerTestResult<float,4> Simple4dSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    Simple4dSoftmaxData data;
-    return Simple4dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, data.inputShape,
-                                                             data.outputData, data.inputData);
-}
-
-LayerTestResult<float,4> Simple4dAxisSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis)
-{
-    armnn::TensorShape inputShape;
-    std::vector<float> inputData;
-    std::vector<float> outputData;
-    switch (axis)
-    {
-    case -4:
-    case 0:
-        {
-            inputShape = {5, 2, 2, 2};
-
-            inputData =
-                    {
-                            17.0f, -1.0f, 17.0f, -1.0f, 17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f,
-                            16.0f, -2.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f, 15.0f, -3.0f,
-                            15.0f, -3.0f, 15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 14.0f, -4.0f,
-                            14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f, 1.0f, -17.0f, 1.0f, -17.0f
-                    };
-
-            outputData =
-                    {
-                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
-                            0.643914213228014f,
-                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.236882800924671f,
-                            0.236882800924671f,
-                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.236882800924671f,
-                            0.236882800924671f,
-                            0.236882800924671f, 0.087144312427294f, 0.087144312427294f, 0.087144312427294f,
-                            0.087144312427294f,
-
-                            0.087144312427294f, 0.087144312427294f, 0.087144312427294f, 0.087144312427294f,
-                            0.032058600957022f,
-                            0.032058600957022f, 0.032058600957022f, 0.032058600957022f, 0.032058600957022f,
-                            0.032058600957022f,
-                            0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f,
-                            7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f, 7.246299848982885e-08f
-                    };
-            break;
-        }
-    case -3:
-    case 1:
-        {
-            inputShape = {2, 5, 2, 2};
-
-            inputData =
-                    {
-                            17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
-                            15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f,
-                            17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
-                            15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f
-                    };
-
-            outputData =
-                    {
-                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
-                            0.236882800924671f,
-                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
-                            0.087144312427294f,
-                            0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
-                            0.032058600957022f,
-                            0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f,
-
-
-                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
-                            0.236882800924671f,
-                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
-                            0.087144312427294f,
-                            0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
-                            0.032058600957022f,
-                            0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
-                            7.246299848982885e-08f
-                    };
-            break;
-        }
-    case -2:
-    case 2:
-        {
-        inputShape = {2, 2, 5, 2};
-
-        inputData =
-                {
-                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
-                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
-                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
-                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
-                };
-
-        outputData =
-                {
-                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                        0.087144312427294f,
-                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                        7.246299848982885e-08f,
-                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                        0.087144312427294f,
-                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                        7.246299848982885e-08f,
-
-                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                        0.087144312427294f,
-                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                        7.246299848982885e-08f,
-                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                        0.087144312427294f,
-                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                        7.246299848982885e-08f
-                };
-        break;
-        }
-    case -1:
-    case 3:
-        {
-            inputShape = {2, 2, 2, 5};
-
-            inputData =
-                    {
-                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
-                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
-                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
-                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
-                    };
-
-            outputData =
-                    {
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f,
-                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                            7.246299848982885e-08f
-                    };
-            break;
-        }
-    }
-
-    return Simple4dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, inputShape,
-                                                             outputData, inputData, axis);
-}
-
-LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta)
-{
-    return SimpleSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta);
-}
-
-LayerTestResult<uint8_t,3> Simple3dSoftmaxUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    Simple3dSoftmaxOutputData data;
-    return Simple3dSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta,
-                                                                     data.inputShape, data.outputData, data.inputData);
-}
-
-LayerTestResult<uint8_t,4> Simple4dSoftmaxUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    Simple4dSoftmaxData data;
-
-    return Simple4dSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta,
-                                                                     data.inputShape, data.outputData, data.inputData);
-}
-
-LayerTestResult<int16_t,2> SimpleSoftmaxUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    return SimpleSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta);
-}
-
-LayerTestResult<int16_t,3> Simple3dSoftmaxUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    Simple3dSoftmaxOutputData data;
-    return Simple3dSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta,
-                                                                     data.inputShape, data.outputData, data.inputData);
-}
-
-LayerTestResult<int16_t,4> Simple4dSoftmaxUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta)
-{
-    Simple4dSoftmaxData data;
-
-    return Simple4dSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta,
-                                                                     data.inputShape, data.outputData, data.inputData);
-}
-
-LayerTestResult<float,4> CompareNormalizationTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::NormalizationAlgorithmChannel normChannel,
-    armnn::NormalizationAlgorithmMethod normMethod)
-{
-    return CompareNormalizationTestImpl(workloadFactory, memoryManager, refWorkloadFactory, normChannel, normMethod);
-}
-
-LayerTestResult<float,2> CompareSoftmaxTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    float beta)
-{
-    return CompareSoftmaxTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, refWorkloadFactory, beta);
-}
-
-LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    float beta)
-{
-    return CompareSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, refWorkloadFactory, beta);
-}
-
-std::vector<LayerTestResult<float,3>> SplitterTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SplitterTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SplitterTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-std::vector<LayerTestResult<int16_t,3>> SplitterInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SplitterTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-LayerTestResult<float, 3> CopyViaSplitterTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return CopyViaSplitterTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return CopyViaSplitterTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return CopyViaSplitterTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-#if defined(ARMNNREF_ENABLED)
-
-// The LSTM test units are run only for the reference backend at the moment
-
-void LstmUtilsZeroVectorTest()
-{
-    armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32);
-    boost::multi_array<float, 1> input = MakeTensor<float, 1>(inputDesc, std::vector<float>(
-            {2., 3., 3., 4.}));
-
-    boost::multi_array<float, 1> expectedOutput = MakeTensor<float, 1>(inputDesc, std::vector<float>(
-            {0., 0., 0., 0.}));
-
-    return LstmUtilsZeroVectorTestImpl<armnn::DataType::Float32>(input, 4, expectedOutput);
-}
-
-void LstmUtilsMeanStddevNormalizationNoneZeroInputTest()
-{
-    uint32_t batchSize = 2;
-    uint32_t vecSize = 4;
-    armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            { 0.1f, 0.2f, 0.3f, 0.4f,      //batch 0
-              0.9f, 1.0f, 1.1f, 1.2f }));  //batch 1
-
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            { -1.34164071f, -0.447213531f, 0.44721365f,  1.34164071f,      //batch 0
-              -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f  }));  //batch 1
-
-    return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
-            vecSize, batchSize, expectedOutput);
-}
-
-void LstmUtilsMeanStddevNormalizationAllZeroInputTest()
-{
-    uint32_t batchSize = 2;
-    uint32_t vecSize = 4;
-    armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
-              0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
-
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
-              0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
-
-    return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
-            vecSize, batchSize, expectedOutput);
-}
-
-void LstmUtilsMeanStddevNormalizationMixedZeroInputTest()
-{
-    uint32_t batchSize = 2;
-    uint32_t vecSize = 4;
-    armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
-              0.1f, 0.2f, 0.3f, 0.4f }));  //batch 1
-
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            {         0.0f,          0.0f,        0.0f,        0.0f,      //batch 0
-              -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f }));  //batch 1
-
-    return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
-            vecSize, batchSize, expectedOutput);
-}
-
-
-void LstmUtilsVectorBatchVectorCwiseProductTest()
-{
-    uint32_t batchSize = 4;
-    uint32_t vecSize = 29;
-    armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
-            {   1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f, 10.1f,
-              11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f,
-              21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,     0.0f}));
-
-    armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
-            { /* batch 0 */
-                1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f,  10.1f,
-              11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f,  20.2f,
-              21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,   0.0f,
-              /* batch 1 */
-                -1.1f,   -2.2f,   -3.3f,   -4.4f,   -5.5f,   -6.6f,   -7.7f,   -8.8f,   -9.9f, -10.1f,
-              -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f,
-              -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f,    0.0f,
-              /* batch 2 */
-                1.1f,   -2.2f,   3.3f,   -4.4f,   5.5f,   -6.6f,   7.7f,   -8.8f,   9.9f, -10.1f,
-              11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f,
-              21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f,   0.0f,
-              /* batch 3 */
-                -1.1f,   2.2f,   -3.3f,   4.4f,   -5.5f,   6.6f,   -7.7f,   8.8f,   -9.9f, 10.1f,
-              -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f,
-              -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f,    0.0f}));
-
-    // Expect output = input * output + output.
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
-            { /* batch 0 */
-                 1.210000f,    4.840000f,   10.889999f,   19.360001f,   30.250000f,   43.559998f,
-                59.289997f,   77.440002f,   98.009995f,  102.010010f,  123.432091f,  146.894394f,
-               172.396896f,  199.939606f,  229.522491f,  261.145599f,  294.808899f,  330.512421f,
-               368.256134f,  408.040039f,  449.864075f,  493.728363f,  539.632874f,  587.577576f,
-               637.562500f,  689.587585f,  743.652954f,  799.758423f,    0.000000f,
-              /* batch 1 */
-                -1.210000f,   -4.840000f,  -10.889999f,  -19.360001f,  -30.250000f,  -43.559998f,
-               -59.289997f,  -77.440002f,  -98.009995f, -102.010010f, -123.432091f, -146.894394f,
-              -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f,
-              -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f,
-              -637.562500f, -689.587585f, -743.652954f, -799.758423f,    0.000000f,
-              /* batch 2 */
-                 1.210000f,   -4.840000f,  10.889999f,   -19.360001f,   30.250000f,  -43.559998f,
-                59.289997f,  -77.440002f,  98.009995f,  -102.010010f,  123.432091f, -146.894394f,
-               172.396896f, -199.939606f, 229.522491f,  -261.145599f,  294.808899f, -330.512421f,
-               368.256134f, -408.040039f, 449.864075f,  -493.728363f,  539.632874f, -587.577576f,
-               637.562500f, -689.587585f, 743.652954f,  -799.758423f,    0.000000f,
-              /* batch 3 */
-                -1.210000f,    4.840000f,  -10.889999f,   19.360001f,  -30.250000f,   43.559998f,
-               -59.289997f,   77.440002f,  -98.009995f,  102.010010f, -123.432091f,  146.894394f,
-              -172.396896f,  199.939606f, -229.522491f,  261.145599f, -294.808899f,  330.512421f,
-              -368.256134f,  408.040039f, -449.864075f,  493.728363f, -539.632874f,  587.577576f,
-              -637.562500f,  689.587585f, -743.652954f,  799.758423f,    0.000000f}));
-
-    return LstmUtilsVectorBatchVectorCwiseProductTestImpl<armnn::DataType::Float32>(vector, batchVector,
-            vecSize, batchSize, expectedOutput);
-}
-
-
-void LstmUtilsVectorBatchVectorAddTest()
-{
-    uint32_t batchSize = 2;
-    uint32_t vecSize = 3;
-    armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
-            { 0.0f, -0.5f, 1.0f}));
-
-    armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
-            { 1.0f, 2.0f, 3.0f,    //batch 0
-              4.0f, 5.0f, 6.0f})); //batch 1
-
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
-            { 1.0f, 1.5f, 4.0f,
-              4.0f, 4.5f, 7.0f}));
-
-    return LstmUtilsVectorBatchVectorAddTestImpl<armnn::DataType::Float32>(vector, batchVector,
-            vecSize, batchSize, expectedOutput);
-}
-
-#endif
-
-LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputDesc({ 2, 2 }, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            { 2., 3., 3., 4. }));
-
-    armnn::TensorInfo outputDesc({ 2, 4 }, armnn::DataType::Float32);
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
-            {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
-             -0.42734814f, -0.00478661f,  0.13455015f, -0.03560682f}));
-    return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, input, expectedOutput);
-}
-
-LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
-             0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}));
-
-    armnn::TensorInfo outputDesc({ 2, 16 }, armnn::DataType::Float32);
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
-            {-0.00396806f, 0.029352f,     -0.00279226f, 0.0159977f,   -0.00835576f,
-             -0.0211779f,  0.0283512f,    -0.0114597f,  0.00907307f,  -0.0244004f,
-             -0.0152191f,  -0.0259063f,   0.00914318f,  0.00415118f,  0.017147f,
-             0.0134203f, -0.013869f,    0.0287268f,   -0.00334693f, 0.00733398f,  -0.0287926f,
-             -0.0186926f,   0.0193662f,   -0.0115437f,  0.00422612f,  -0.0345232f,
-             0.00223253f,   -0.00957321f, 0.0210624f,   0.013331f,    0.0150954f,
-             0.02168f}));
-    return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, input, expectedOutput);
-}
-
-LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            {2., 3., 3., 4.}));
-
-
-    armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::Float32);
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
-            {{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
-              -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f}}));
-
-    return LstmNoCifgNoPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, input, expectedOutput);
-}
-
-
-LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
-    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
-            {0.7f, 0.8f, 0.1f, 0.2f, 0.3f,     //batch 0
-             0.3f, 0.2f, 0.9f, 0.8f, 0.1f}));  //batch 1
-
-    armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32);
-    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
-            {  0.0244077f,  0.128027f, -0.00170918f,    //batch 0
-             -0.00692428f, 0.0848741f,    0.063445f})); //batch 1
-    return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, input, expectedOutput);
-}
-
-
-LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const float qScale = 1.0f;
-    const int32_t qOffset = 0;
-
-    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
-    const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
-
-    armnn::TensorInfo inputDesc({2, 2}, datatype);
-    boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(inputDesc, QuantizedVector<int16_t>(qScale, qOffset,
-            std::vector<float>{2., 3., 3., 4.}));
-
-    armnn::TensorInfo outputDesc({2, 4}, datatype);
-    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
-            qOffset, std::vector<float>({{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
-                                          -0.0185422f,  0.11281417f,  0.24466537f, -0.1826292f}})));
-
-    return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
-        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
-
-}
-
-LayerTestResult<int16_t, 2> LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const float qScale = 1.0f;
-    const int32_t qOffset = 0;
-
-    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
-    const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
-
-    armnn::TensorInfo inputDesc({ 2, 2 }, datatype);
-    boost::multi_array<int16_t, 2> input = MakeTensor<int16_t, 2>(inputDesc, QuantizedVector<int16_t>(qScale, qOffset,
-            std::vector<float>({ 2., 3., 3., 4. })));
-
-    armnn::TensorInfo outputDesc({ 2, 4 }, datatype);
-    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
-            qOffset, std::vector<float>(
-            {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
-             -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f})));
-
-    return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<datatype>(
-        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
-}
-
-LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const float qScale = 2.0f;
-    const int32_t qOffset = 0;
-
-    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
-    const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
-
-    armnn::TensorInfo inputDesc({ 2, 5 }, datatype);
-    boost::multi_array<int16_t, 2> input = MakeTensor<int16_t, 2>(inputDesc, QuantizedVector<int16_t>(qScale,
-            qOffset, std::vector<float>(
-            {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
-             0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f})));
-
-    armnn::TensorInfo outputDesc({ 2, 16 }, datatype);
-    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
-            qOffset, std::vector<float>(
-            {-0.00396806f,  0.029352f,   -0.00279226f, 0.0159977f,  -0.00835576f,
-             -0.0211779f,   0.0283512f,  -0.0114597f,  0.00907307f, -0.0244004f,
-             -0.0152191f,  -0.0259063f,   0.00914318f, 0.00415118f,  0.017147f,
-              0.0134203f,  -0.013869f,    0.0287268f, -0.00334693f,  0.00733398f, -0.0287926f,
-             -0.0186926f,   0.0193662f,  -0.0115437f,  0.00422612f, -0.0345232f,
-              0.00223253f, -0.00957321f,  0.0210624f,  0.013331f,    0.0150954f,   0.02168f})));
-
-    return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<datatype>(
-        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
-}
-
-LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const float qScale = 1.0f;
-    const int32_t qOffset = 0;
-
-    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16; // datatype & constants set to QSymm16
-
-    armnn::TensorInfo inputDesc({2, 2}, datatype);
-    boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(inputDesc, QuantizedVector<int16_t>(qScale,
-            qOffset, std::vector<float>{2., 3., 3., 4.}));
-
-    armnn::TensorInfo outputDesc({2, 4}, datatype);
-    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
-            qOffset, std::vector<float>({{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
-                                          -0.0185422f,  0.11281417f,  0.24466537f, -0.1826292f}})));
-
-    return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
-        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, datatype);
-}
-
-// QuantizedLstm
-LayerTestResult<uint8_t, 2> QuantizedLstmTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::QuantisedAsymm8);
-    boost::multi_array<uint8_t, 2> input = MakeTensor<uint8_t, 2>(inputDesc, std::vector<uint8_t>(
-        {166, 179, 50, 150}));
-
-    armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QuantisedAsymm8);
-    boost::multi_array<uint8_t, 2> expectedOutput = MakeTensor<uint8_t, 2>(outputDesc, std::vector<uint8_t>(
-        {140, 151, 146, 112, 136, 156, 142, 112 }));
-
-    return QuantizedLstmTestImpl(workloadFactory, memoryManager, input, expectedOutput);
-}
-
-LayerTestResult<float,3> ConcatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    unsigned int outputWidth = 3;
-    unsigned int outputHeight = 6;
-    unsigned int outputChannels = 3;
-
-    unsigned int inputWidth1 = 3;
-    unsigned int inputHeight1 = 6;
-    unsigned int inputChannels1 = 2;
-
-    unsigned int inputWidth2 = 3;
-    unsigned int inputHeight2 = 6;
-    unsigned int inputChannels2 = 1;
-
-    // Define the tensor descriptors.
-    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
-    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
-    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
-
-    LayerTestResult<float,3> ret(outputTensorInfo);
-
-    ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
-    {
-            1.0f, 2.0f, 3.0f,
-            4.0f, 5.0f, 6.0f,
-            7.0f, 8.0f, 9.0f,
-            10.0f, 11.0f, 12.0f,
-            13.0f, 14.0f, 15.0f,
-            16.0f, 17.0f, 18.0f,
-
-            19.0f, 20.0f, 21.0f,
-            22.0f, 23.0f, 24.0f,
-            25.0f, 26.0f, 27.0f,
-            28.0f, 29.0f, 30.0f,
-            31.0f, 32.0f, 33.0f,
-            34.0f, 35.0f, 36.0f,
-
-            37.0f, 38.0f, 39.0f,
-            40.0f, 41.0f, 42.0f,
-            43.0f, 44.0f, 45.0f,
-            46.0f, 47.0f, 48.0f,
-            49.0f, 50.0f, 51.0f,
-            52.0f, 53.0f, 54.0f,
-        })
-    );
-
-    auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
-        {
-            1.0f, 2.0f, 3.0f,
-            4.0f, 5.0f, 6.0f,
-            7.0f, 8.0f, 9.0f,
-            10.0f, 11.0f, 12.0f,
-            13.0f, 14.0f, 15.0f,
-            16.0f, 17.0f, 18.0f,
-
-            19.0f, 20.0f, 21.0f,
-            22.0f, 23.0f, 24.0f,
-            25.0f, 26.0f, 27.0f,
-            28.0f, 29.0f, 30.0f,
-            31.0f, 32.0f, 33.0f,
-            34.0f, 35.0f, 36.0f,
-        })
-    );
-
-    auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
-        {
-            37.0f, 38.0f, 39.0f,
-            40.0f, 41.0f, 42.0f,
-            43.0f, 44.0f, 45.0f,
-            46.0f, 47.0f, 48.0f,
-            49.0f, 50.0f, 51.0f,
-            52.0f, 53.0f, 54.0f,
-        })
-    );
-
-    std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
-    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
-
-    std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
-    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
-        subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo1);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle2  =
-        subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo2);
-
-    armnn::ConcatQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
-    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_ViewOrigins.push_back(window1);
-    data.m_ViewOrigins.push_back(window2);
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
-
-    inputHandle1->Allocate();
-    inputHandle2->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
-    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-LayerTestResult<float,4> CompareBatchNormTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory)
-{
-    const unsigned int width     = 2;
-    const unsigned int height    = 3;
-    const unsigned int channels  = 5;
-    const unsigned int batchSize = 3;
-
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-    armnn::TensorInfo tensorInfo;
-
-    constexpr unsigned int shape[]       = {batchSize, channels, height, width};
-    constexpr unsigned int tensorShape[] = {channels};
-
-    inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
-    outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
-    tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32);
-
-    auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312);
-
-    auto mean     = MakeRandomTensor<float, 1>(tensorInfo, 123);
-    auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f);
-    auto beta     = MakeRandomTensor<float, 1>(tensorInfo, 123);
-    auto gamma    = MakeRandomTensor<float, 1>(tensorInfo, 345);
-
-    LayerTestResult<float,4> ret(outputTensorInfo);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandleRef  = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::BatchNormalizationQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
-    armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
-
-    AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
-    AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
-    AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
-    AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Mean             = &meanTensor;
-    data.m_Variance         = &varianceTensor;
-    data.m_Beta             = &betaTensor;
-    data.m_Gamma            = &gammaTensor;
-    data.m_Parameters.m_Eps = 0.01f;
-
-    armnn::BatchNormalizationQueueDescriptor refData = data;
-    armnn::WorkloadInfo refInfo = info;
-    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
-    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
-    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    inputHandleRef->Allocate();
-    outputHandleRef->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-    workloadRef->PostAllocationConfigure();
-    workloadRef->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-    CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
-
-    return ret;
-}
-
-template<typename T>
-void PermuteTensorData(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::PermutationVector& mappings,
-        armnn::TensorInfo & inputTensorInfo,
-        const T * inputData,
-        std::vector<T>& outputData)
-{
-    BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
-    if (inputData == nullptr)
-    {
-        // Nullptr is an error in the test. By returning without doing the concatenation
-        // I expect the caller to fail the test. It still makes sense to report this as
-        // an assert for Debug builds.
-        return;
-    }
-
-    armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::PermuteQueueDescriptor queueDescriptor;
-    queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings};
-    armnn::WorkloadInfo workloadInfo;
-    AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), inputData);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    outputData.resize(outputTensorInfo.GetNumElements());
-    CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
-    inputTensorInfo = outputTensorInfo;
-}
-
-armnn::OriginsDescriptor CreateDescriptorForConcatenation(
-        const std::vector<armnn::TensorInfo> & inputTensorInfos,
-        unsigned int concatDim)
-{
-    std::vector<armnn::TensorShape> shapes;
-    shapes.reserve(inputTensorInfos.size());
-    for (const armnn::TensorInfo& it: inputTensorInfos)
-    {
-        shapes.push_back(it.GetShape());
-    }
-
-    return armnn::CreateDescriptorForConcatenation(shapes.begin(),
-                                                   shapes.end(),
-                                                   concatDim);
-}
-
-//
-// Concatenation is only supported for N and C dimensions for NCHW and the inner most dimension
-// In case of <4 dimensions we need to make sure that the concat dimensions are at least
-// the 3rd slowest iterating one or the inner most dimension.
-//
-
-bool NeedPermuteForConcat(
-        const std::vector<armnn::TensorInfo> & inputTensorInfos,
-        unsigned int concatDim)
-{
-    // See note above. Additionally we expect the input shapes to have the
-    // same number of dimensions.
-    unsigned int nDimensions = 0;
-
-    // Determine the number of dimensions as well as sanity check them
-    // agains test implementation issues.
-    for (auto && tensorInfo : inputTensorInfos)
-    {
-        if (!nDimensions)
-        {
-            nDimensions = tensorInfo.GetShape().GetNumDimensions();
-        }
-        else
-        {
-            BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
-                "Input shapes must have the same number of dimensions");
-        }
-    }
-
-    return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1));
-}
-
-armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape)
-{
-    unsigned int numDims = inputShape.GetNumDimensions();
-    if (numDims >= 3)
-    {
-        // Nothing to do if the inputShape has at least 3 dimensions.
-        return inputShape;
-    }
-
-    std::vector<unsigned int> newDims(size_t(3), 1u);
-    unsigned int expandedBy = 3 - numDims;
-    for (unsigned int i=0; i<numDims; ++i)
-    {
-        newDims[expandedBy+i] = inputShape[i];
-    }
-    return armnn::TensorShape(3u, &newDims[0]);
-}
-
-void Generate3dPermuteVectorForConcat(
-        unsigned int numDimensions,
-        unsigned int & concatDim,
-        std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations)
-{
-    BOOST_ASSERT_MSG(numDimensions <= 3,
-       "Only dimensions 1,2 and 3 are supported by this helper");
-    unsigned int expandedBy = 3 - numDimensions;
-    unsigned int expandedConcatAxis = concatDim + expandedBy;
-
-    if (expandedConcatAxis == 2)
-    {
-        concatDim = 0;
-        armnn::PermutationVector forwardPermutation({1, 2, 0});
-        armnn::PermutationVector reversePermutation({2, 0, 1});
-        permutations = std::make_pair(forwardPermutation, reversePermutation);
-    }
-    else if (expandedConcatAxis == 1)
-    {
-        concatDim = 0;
-        armnn::PermutationVector forwardPermutation({2, 0, 1});
-        armnn::PermutationVector reversePermutation({1, 2, 0});
-        permutations = std::make_pair(forwardPermutation, reversePermutation);
-    }
-    else
-    {
-        BOOST_ASSERT(expandedConcatAxis == 0);
-        concatDim = 0;
-    }
-}
-
-//
-// Permute the input tensors so we can do a supported concatenation.
-// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
-// at the front. Finally this function tells what the output shape
-// of the permuted concatenated tensor is going to be.
-//
-template <typename T>
-void PermuteInputsForConcat(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        std::vector<armnn::TensorInfo> & inputTensorInfos,
-        std::vector<T *> & inputData,
-        std::vector<std::vector<T>> & inputDataStorage,
-        armnn::PermutationVector & permuteVector,
-        unsigned int & concatDim,
-        armnn::TensorInfo & outputTensorInfo)
-{
-    BOOST_ASSERT_MSG(inputTensorInfos.size() > 1,
-        "Expecting more than one tensor to be concatenated here");
-
-    unsigned int numDims = 0;
-    unsigned int nthInput = 0;
-    const armnn::PermutationVector identity({0, 1, 2});
-
-    std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations =
-        std::make_pair(identity, identity);
-
-    inputDataStorage.resize(inputData.size());
-
-    for (auto && tensorInfo : inputTensorInfos)
-    {
-        if (numDims == 0)
-        {
-            numDims = tensorInfo.GetShape().GetNumDimensions();
-            Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
-
-            // Store the reverese permutation.
-            permuteVector = permutations.second;
-            BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity),
-                "Test logic error, we don't need permutation, so we shouldn't arrive here");
-        }
-        else
-        {
-            BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
-                "All inputs must have the same number of dimensions");
-        }
-
-        armnn::TensorInfo newTensorInfo = tensorInfo;
-        newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
-
-        PermuteTensorData<T>(workloadFactory,
-                             memoryManager,
-                             permutations.first,
-                             newTensorInfo,
-                             inputData[nthInput],
-                             inputDataStorage[nthInput]);
-
-        inputData[nthInput] = inputDataStorage[nthInput].data();
-        inputTensorInfos[nthInput] = newTensorInfo;
-
-        ++nthInput;
-    }
-
-    outputTensorInfo.SetShape(
-        armnnUtils::Permuted(
-            ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
-            permutations.first));
-}
-
-
-//
-// This is the pair of PermuteInputsForConcat(...) which permutes back
-// the output of the concatenation so we can check it against an expected
-// output.
-//
-template <typename T>
-void PermuteOutputForConcat(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::TensorInfo & tensorInfo,
-        const armnn::PermutationVector & permuteVector,
-        std::unique_ptr<armnn::ITensorHandle> && inputDataHandle,
-        T * data)
-{
-    BOOST_ASSERT_MSG(data != nullptr, "data must not be null");
-    if (data == nullptr)
-    {
-        // Nullptr is an error in the test. By returning without doing the permutation
-        // I expect the caller to fail the test. It still makes sense to report this as
-        // an assert for Debug builds.
-        return;
-    }
-
-    armnn::TensorInfo resultTensorInfo = tensorInfo;
-    std::vector<T> inputData(tensorInfo.GetNumElements());
-    std::vector<T> outputData;
-
-    CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
-
-    PermuteTensorData<T>(workloadFactory,
-                         memoryManager,
-                         permuteVector,
-                         resultTensorInfo,
-                         &inputData[0],
-                         outputData);
-
-    ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
-}
-
-template <typename T>
-void Concatenate(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig,
-    std::initializer_list<T *> inputsOrig,
-    const armnn::TensorInfo& outputTensorInfoOrig,
-    T * output,
-    unsigned int concatDim,
-    bool useSubtensor)
-{
-    BOOST_ASSERT_MSG(output != nullptr, "output must not be null");
-    if (output == nullptr)
-    {
-        // Nullptr is an error in the test. By returning without doing the permutation
-        // I expect the caller to fail the test. It still makes sense to report this as
-        // an assert for Debug builds.
-        return;
-    }
-
-    // Saves a copy of the parameters which we might need to change.
-    std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
-    std::vector<T *> inputs            = inputsOrig;
-    armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig;
-
-    armnn::PermutationVector permuteVector{0, 1, 2};
-
-    // Holds and automatically releases memory for the reshaped input data.
-    std::vector<std::vector<T>> tmpInputDataStorage;
-
-    const size_t inputCount = inputTensorInfos.size();
-
-    bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
-
-    if (needPermuteForConcat)
-    {
-        //
-        // We need to permute the inputs, because concatenation along
-        // the requested axis is not supported.
-        //
-        PermuteInputsForConcat<T>(workloadFactory,
-                                  memoryManager,
-                                  inputTensorInfos,
-                                  inputs,
-                                  tmpInputDataStorage,
-                                  permuteVector,
-                                  concatDim,
-                                  outputTensorInfo);
-    }
-
-    armnn::WorkloadInfo workloadInfo;
-
-    std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
-    inputHandles.reserve(inputCount);
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ConcatQueueDescriptor queueDescriptor;
-    armnn::OriginsDescriptor viewsDescriptor = CreateDescriptorForConcatenation(inputTensorInfos, concatDim);
-    queueDescriptor.m_Parameters = viewsDescriptor;
-
-    if (useSubtensor)
-    {
-        queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
-        for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
-        {
-            queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
-                viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
-        }
-
-        outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-        const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
-        for (unsigned int i = 0; i < inputCount; ++i)
-        {
-            const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i];
-            std::unique_ptr<armnn::ITensorHandle> inputHandle =
-                subTensorsSupported ?
-                    workloadFactory.CreateSubTensorHandle(*outputHandle,
-                                                          inputTensorInfo.GetShape(),
-                                                          queueDescriptor.m_ViewOrigins[i].m_Origin.data()) :
-                    workloadFactory.CreateTensorHandle(inputTensorInfo);
-
-            inputHandles.emplace_back(std::move(inputHandle));
-        }
-
-    }
-    else
-    {
-        for (unsigned int i = 0; i < inputCount; ++i)
-        {
-            std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfos[i]);
-            inputHandles.emplace_back(std::move(inputHandle));
-        }
-    }
-
-    for (unsigned int i = 0; i < inputCount; ++i)
-    {
-        AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
-    }
-
-    AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(queueDescriptor, workloadInfo);
-
-    for (auto& inputHandle : inputHandles)
-    {
-        inputHandle->Allocate();
-    }
-
-    outputHandle->Allocate();
-
-    unsigned int nextInputId = 0;
-    for (auto& inputHandle : inputHandles)
-    {
-        CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
-        ++nextInputId;
-    }
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    if (needPermuteForConcat)
-    {
-        PermuteOutputForConcat<T>(workloadFactory,
-                                  memoryManager,
-                                  outputTensorInfo,
-                                  permuteVector,
-                                  std::move(outputHandle),
-                                  output);
-    }
-    else
-    {
-        CopyDataFromITensorHandle(output, outputHandle.get());
-    }
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 1> Concatenation1dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f }));
-    auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f }));
-    auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f }));
-
-    armnn::TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 1> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { inputTensorInfo, inputTensorInfo, inputTensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   0,
-                   true);
-
-    result.output = MakeTensor<T, 1>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 1> Concatenation1dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation1dTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Concatenation2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::TensorInfo& outputTensorInfo,
-    unsigned int dimension,
-    const float qScale,
-    const int32_t qOffset)
-{
-    armnn::TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f,
-    }));
-
-    auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        4.0f, 5.0f, 6.0f,
-
-        // Batch 1
-        13.0f, 14.0f, 15.0f,
-    }));
-
-    auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        7.0f, 8.0f, 9.0f,
-
-        // Batch 1
-        16.0f, 17.0f, 18.0f,
-    }));
-
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { inputTensorInfo, inputTensorInfo, inputTensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   true);
-
-    result.output = MakeTensor<T, 2>(outputTensorInfo, output);
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Concatenation2dDim0TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 2> result = Concatenation2dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 0, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f,
-
-        // Batch 2
-        4.0f, 5.0f, 6.0f,
-
-        // Batch 3
-        13.0f, 14.0f, 15.0f,
-
-        // Batch 4
-        7.0f, 8.0f, 9.0f,
-
-        // Batch 5
-        16.0f, 17.0f, 18.0f,
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 2> Concatenation2dDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Concatenation2dDim1TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 2> result = Concatenation2dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 1, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 2> Concatenation2dDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
-    auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f,
-    }));
-
-    armnn::TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset);
-    auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        4.0f, 5.0f, 6.0f,
-
-        // Batch 1
-        13.0f, 14.0f, 15.0f,
-
-        // Batch 0
-        7.0f, 8.0f, 9.0f,
-    }));
-
-    armnn::TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset);
-    auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 1
-        16.0f, 17.0f, 18.0f,
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   0,
-                   true);
-
-    result.output = MakeTensor<T, 2>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f,
-
-        // Batch 2
-        4.0f, 5.0f, 6.0f,
-
-        // Batch 3
-        13.0f, 14.0f, 15.0f,
-
-        // Batch 4
-        7.0f, 8.0f, 9.0f,
-
-        // Batch 5
-        16.0f, 17.0f, 18.0f,
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim0DiffInputDimsTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
-    auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f,
-    }));
-
-    armnn::TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset);
-    auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
-
-        // Batch 1
-        13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
-    }));
-
-    armnn::TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset);
-    auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        9.0f,
-
-        // Batch 1
-        18.0f
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   1,
-                   true);
-
-    result.output = MakeTensor<T, 2>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0
-        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
-
-        // Batch 1
-        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim1DiffInputDimsTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::TensorInfo& outputTensorInfo,
-    unsigned int dimension,
-    bool useSubtensor,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f
-    }));
-
-    auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        7.0f, 8.0f,
-
-        // Batch 0, Channel 1
-        9.0f, 10.0f,
-
-        // Batch 0, Channel 2
-        11.0f, 12.0f,
-
-        // Batch 1, Channel 0
-        25.0f, 26.0f,
-
-        // Batch 1, Channel 1
-        27.0f, 28.0f,
-
-        // Batch 1, Channel 2
-        29.0f, 30.0f
-    }));
-
-    auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        13.0f, 14.0f,
-
-        // Batch 0, Channel 1
-        15.0f, 16.0f,
-
-        // Batch 0, Channel 2
-        17.0f, 18.0f,
-
-        // Batch 1, Channel 0
-        31.0f, 32.0f,
-
-        // Batch 1, Channel 1
-        33.0f, 34.0f,
-
-        // Batch 1, Channel 2
-        35.0f, 36.0f
-    }));
-
-    LayerTestResult<T, 3> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { inputTensorInfo, inputTensorInfo, inputTensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   useSubtensor);
-
-    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dDim0TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 3> result = Concatenation3dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 0, true, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f,
-
-        // Batch 2, Channel 0
-        7.0f, 8.0f,
-
-        // Batch 2, Channel 1
-        9.0f, 10.0f,
-
-        // Batch 2, Channel 2
-        11.0f, 12.0f,
-
-        // Batch 3, Channel 0
-        25.0f, 26.0f,
-
-        // Batch 3, Channel 1
-        27.0f, 28.0f,
-
-        // Batch 3, Channel 2
-        29.0f, 30.0f,
-
-        // Batch 4, Channel 0
-        13.0f, 14.0f,
-
-        // Batch 4, Channel 1
-        15.0f, 16.0f,
-
-        // Batch 4, Channel 2
-        17.0f, 18.0f,
-
-        // Batch 5, Channel 0
-        31.0f, 32.0f,
-
-        // Batch 5, Channel 1
-        33.0f, 34.0f,
-
-        // Batch 5, Channel 2
-        35.0f, 36.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 3> Concatenation3dDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dDim1TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 3> result = Concatenation3dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 1, true, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 0, Channel 3
-        7.0f, 8.0f,
-
-        // Batch 0, Channel 4
-        9.0f, 10.0f,
-
-        // Batch 0, Channel 5
-        11.0f, 12.0f,
-
-        // Batch 0, Channel 6
-        13.0f, 14.0f,
-
-        // Batch 0, Channel 7
-        15.0f, 16.0f,
-
-        // Batch 0, Channel 8
-        17.0f, 18.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f,
-
-        // Batch 1, Channel 3
-        25.0f, 26.0f,
-
-        // Batch 1, Channel 4
-        27.0f, 28.0f,
-
-        // Batch 1, Channel 5
-        29.0f, 30.0f,
-
-        // Batch 1, Channel 6
-        31.0f, 32.0f,
-
-        // Batch 1, Channel 7
-        33.0f, 34.0f,
-
-        // Batch 1, Channel 8
-        35.0f, 36.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 3> Concatenation3dDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dDim2TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 3> result = Concatenation3dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 2, useSubtensor, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 3> Concatenation3dDim2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation3dDim2TestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, useSubtensor, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType);
-    auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-            // Batch 0, Channel 0
-            1.0f, 2.0f,
-
-            // Batch 0, Channel 1
-            3.0f, 4.0f,
-
-            // Batch 0, Channel 2
-            5.0f, 6.0f,
-
-            // Batch 1, Channel 0
-            19.0f, 20.0f,
-
-            // Batch 1, Channel 1
-            21.0f, 22.0f,
-
-            // Batch 1, Channel 2
-            23.0f, 24.0f
-    }));
-
-    armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType);
-    auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-            // Batch 0, Channel 0
-            7.0f, 8.0f,
-
-            // Batch 0, Channel 1
-            9.0f, 10.0f,
-
-            // Batch 0, Channel 2
-            11.0f, 12.0f,
-    }));
-
-    armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType);
-    auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-            // Batch 0, Channel 0
-            25.0f, 26.0f,
-
-            // Batch 0, Channel 1
-            27.0f, 28.0f,
-
-            // Batch 0, Channel 2
-            29.0f, 30.0f,
-
-            // Batch 1, Channel 0
-            13.0f, 14.0f,
-
-            // Batch 1, Channel 1
-            15.0f, 16.0f,
-
-            // Batch 1, Channel 2
-            17.0f, 18.0f,
-
-            // Batch 2, Channel 0
-            31.0f, 32.0f,
-
-            // Batch 2, Channel 1
-            33.0f, 34.0f,
-
-            // Batch 2, Channel 2
-            35.0f, 36.0f
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType);
-    LayerTestResult<T, 3> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   0,
-                   true);
-
-    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f,
-
-        // Batch 2, Channel 0
-        7.0f, 8.0f,
-
-        // Batch 2, Channel 1
-        9.0f, 10.0f,
-
-        // Batch 2, Channel 2
-        11.0f, 12.0f,
-
-        // Batch 3, Channel 0
-        25.0f, 26.0f,
-
-        // Batch 3, Channel 1
-        27.0f, 28.0f,
-
-        // Batch 3, Channel 2
-        29.0f, 30.0f,
-
-        // Batch 4, Channel 0
-        13.0f, 14.0f,
-
-        // Batch 4, Channel 1
-        15.0f, 16.0f,
-
-        // Batch 4, Channel 2
-        17.0f, 18.0f,
-
-        // Batch 5, Channel 0
-        31.0f, 32.0f,
-
-        // Batch 5, Channel 1
-        33.0f, 34.0f,
-
-        // Batch 5, Channel 2
-        35.0f, 36.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim0DiffInputDimsTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
-    auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f
-    }));
-
-    armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset);
-    auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        7.0f, 8.0f,
-
-        // Batch 0, Channel 1
-        9.0f, 10.0f,
-
-        // Batch 0, Channel 2
-        11.0f, 12.0f,
-
-        // Batch 0, Channel 3
-        25.0f, 26.0f,
-
-        // Batch 1, Channel 0
-        27.0f, 28.0f,
-
-        // Batch 1, Channel 1
-        29.0f, 30.0f,
-
-        // Batch 1, Channel 2
-        13.0f, 14.0f,
-
-        // Batch 1, Channel 3
-        15.0f, 16.0f,
-    }));
-
-    armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset);
-    auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        17.0f, 18.0f,
-
-        // Batch 1, Channel 0
-        31.0f, 32.0f,
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset);
-    LayerTestResult<T, 3> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   1,
-                   true);
-
-    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 0, Channel 3
-        7.0f, 8.0f,
-
-        // Batch 0, Channel 4
-        9.0f, 10.0f,
-
-        // Batch 0, Channel 5
-        11.0f, 12.0f,
-
-        // Batch 0, Channel 6
-        25.0f, 26.0f,
-
-        // Batch 0, Channel 7
-        17.0f, 18.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f,
-
-        // Batch 1, Channel 3
-        27.0f, 28.0f,
-
-        // Batch 1, Channel 4
-        29.0f, 30.0f,
-
-        // Batch 1, Channel 5
-        13.0f, 14.0f,
-
-        // Batch 1, Channel 6
-        15.0f, 16.0f,
-
-        // Batch 1, Channel 7
-        31.0f, 32.0f,
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim1DiffInputDimsTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
-    auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f
-    }));
-
-    armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset);
-    auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        7.0f,
-
-        // Batch 0, Channel 1
-        9.0f,
-
-        // Batch 0, Channel 2
-        11.0f,
-
-        // Batch 1, Channel 0
-        25.0f,
-
-        // Batch 1, Channel 1
-        27.0f,
-
-        // Batch 1, Channel 2
-        29.0f
-    }));
-
-    armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset);
-    auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        13.0f, 14.0f, 50.0f,
-
-        // Batch 0, Channel 1
-        15.0f, 16.0f, 51.0f,
-
-        // Batch 0, Channel 2
-        17.0f, 18.0f, 52.0f,
-
-        // Batch 1, Channel 0
-        31.0f, 32.0f, 53.0f,
-
-        // Batch 1, Channel 1
-        33.0f, 34.0f, 54.0f,
-
-        // Batch 1, Channel 2
-        35.0f, 36.0f, 55.0f,
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
-    LayerTestResult<T, 3> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory, memoryManager,
-                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
-                   { input0.data(), input1.data(), input2.data() },
-                   outputTensorInfo,
-                   output.data(),
-                   2,
-                   useSubtensor);
-
-    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
-
-        // Batch 0, Channel 1
-        3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
-
-        // Batch 0, Channel 2
-        5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
-
-        // Batch 1, Channel 0
-        19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
-
-        // Batch 1, Channel 1
-        21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
-
-        // Batch 1, Channel 2
-        23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation3dDim2DiffInputDimsTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, useSubtensor, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::TensorInfo& outputTensorInfo,
-    unsigned int dimension,
-    bool useSubtensor,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f
-    }));
-
-    auto input1 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f
-    }));
-
-    auto input2 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f,
-        29.0f, 30.0f,
-        31.0f, 32.0f
-    }));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-
-    Concatenate<T>(workloadFactory,
-                   memoryManager,
-                   {inputTensorInfo, inputTensorInfo, inputTensorInfo},
-                   {input0.data(), input1.data(), input2.data()},
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   useSubtensor);
-
-    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDim0TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 0, true, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f,
-
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f,
-        29.0f, 30.0f,
-        31.0f, 32.0f
-    }));
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDim1TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 1, true, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f,
-
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f,
-        29.0f, 30.0f,
-        31.0f, 32.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDim2TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    armnn::TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 2, true, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f,
-
-        9.0f, 10.0f,
-        11.0f, 12.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-        29.0f, 30.0f,
-        31.0f, 32.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDim2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDim2TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDim3TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool useSubtensor)
-{
-    armnn::TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, outputTensorInfo, 3, useSubtensor, qScale, qOffset);
-
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        11.0f, 12.0f,
-        21.0f, 22.0f,
-        3.0f, 4.0f,
-        13.0f, 14.0f,
-        23.0f, 24.0f,
-
-        5.0f, 6.0f,
-        15.0f, 16.0f,
-        25.0f, 26.0f,
-        7.0f, 8.0f,
-        17.0f, 18.0f,
-        27.0f, 28.0f,
-
-        9.0f, 10.0f,
-        19.0f, 20.0f,
-        29.0f, 30.0f,
-        11.0f, 12.0f,
-        21.0f, 22.0f,
-        31.0f, 32.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDim3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation4dDim3TestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0, useSubtensor);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDiffShapeDim0TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    unsigned int dimension = 0;
-    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f
-    }));
-
-    armnn::TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f,
-        29.0f, 30.0f,
-        31.0f, 32.0f
-
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory,
-                   memoryManager,
-                   {inputTensorInfo0, inputTensorInfo1},
-                   {input0.data(), input1.data()},
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   true);
-
-    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f,
-
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f,
-        29.0f, 30.0f,
-        31.0f, 32.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDiffShapeDim0TestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDiffShapeDim1TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    unsigned int dimension = 1;
-    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f
-    }));
-
-    armnn::TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory,
-                   memoryManager,
-                   {inputTensorInfo0, inputTensorInfo1},
-                   {input0.data(), input1.data()},
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   true);
-
-    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f,
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDiffShapeDim1TestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDiffShapeDim2TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    unsigned int dimension = 2;
-    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f
-    }));
-
-    armnn::TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory,
-                   memoryManager,
-                   {inputTensorInfo0, inputTensorInfo1},
-                   {input0.data(), input1.data()},
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   true);
-
-    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        11.0f, 12.0f,
-        13.0f, 14.0f,
-        15.0f, 16.0f,
-
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        17.0f, 18.0f,
-        19.0f, 20.0f,
-        21.0f, 22.0f,
-
-        9.0f, 10.0f,
-        11.0f, 12.0f,
-        23.0f, 24.0f,
-        25.0f, 26.0f,
-        27.0f, 28.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDiffShapeDim2TestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Concatenation4dDiffShapeDim3TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    bool useSubtensor)
-{
-    unsigned int dimension = 3;
-    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
-
-    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f,
-        3.0f, 4.0f,
-        5.0f, 6.0f,
-        7.0f, 8.0f,
-        9.0f, 10.0f,
-        11.0f, 12.0f
-    }));
-
-    armnn::TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset);
-
-    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
-        11.0f, 12.0f, 13.0f,
-        14.0f, 15.0f, 16.0f,
-
-        17.0f, 18.0f, 19.0f,
-        20.0f, 21.0f, 22.0f,
-
-        23.0f, 24.0f, 25.0f,
-        26.0f, 27.0f, 28.0f
-    }));
-
-    armnn::TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset);
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-
-    std::vector<T> output;
-    output.resize(outputTensorInfo.GetNumElements());
-    Concatenate<T>(workloadFactory,
-                   memoryManager,
-                   {inputTensorInfo0, inputTensorInfo1},
-                   {input0.data(), input1.data()},
-                   outputTensorInfo,
-                   output.data(),
-                   dimension,
-                   useSubtensor);
-
-    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
-        1.0f, 2.0f, 11.0f, 12.0f, 13.0f,
-        3.0f, 4.0f, 14.0f, 15.0f, 16.0f,
-        5.0f, 6.0f, 17.0f, 18.0f, 19.0f,
-        7.0f, 8.0f, 20.0f, 21.0f, 22.0f,
-        9.0f, 10.0f, 23.0f, 24.0f, 25.0f,
-        11.0f, 12.0f, 26.0f, 27.0f, 28.0f
-    }));
-
-    return result;
-}
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation4dDiffShapeDim3TestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, 0.0f, 0, useSubtensor);
-}
-
-LayerTestResult<float, 2> FakeQuantizationTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    constexpr unsigned int width = 2;
-    constexpr unsigned int height = 3;
-
-    const armnn::TensorInfo tensorInfo({height, width },
-        armnn::DataType::Float32);
-    auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
-       -10.0f,  -5.0f,
-         0.0f,   5.0f,
-        10.0f,  10.0f
-    }));
-
-    LayerTestResult<float, 2> ret(tensorInfo);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
-
-    armnn::FakeQuantizationQueueDescriptor data;
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
-    float min = -10.f;
-    float max = 10.f;
-
-    data.m_Parameters.m_Min = min;
-    data.m_Parameters.m_Max = max;
-
-    armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]);
-    armnn::FakeQuantizationQueueDescriptor refData = data;
-    armnn::WorkloadInfo refInfo = info;
-    SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle);
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
-
-    ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
-        0.0f,     63.0f,
-        128.0f,   191.0f,
-        255.0f,   255.0f
-    }));
-    return ret;
-}
-
-namespace
-{
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> L2NormalizationTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::TensorShape& inputOutputTensorShape,
-    float scale,
-    int32_t offset,
-    const std::vector<float>& inputValues,
-    float outScale,
-    int32_t outOffset,
-    const std::vector<float>& expectedOutputValues,
-    const armnn::DataLayout layout,
-    float epsilon = 1e-12f)
-{
-    const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, ArmnnType, scale, offset);
-    const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, ArmnnType, outScale, outOffset);
-
-    // at this point if we require it permute the input data
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    std::vector<float> inputData = inputValues;
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-    }
-
-    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
-                                                         inputTensorInfo.GetQuantizationScale(),
-                                                         inputTensorInfo.GetQuantizationOffset(),
-                                                         inputData));
-
-    std::vector<float> expectedOutputData = expectedOutputValues;
-    if (layout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(expectedOutputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, expectedOutputData.data(), tmp.data(),
-                            sizeof(float));
-        expectedOutputData = tmp;
-    }
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
-                                                               outputTensorInfo.GetQuantizationScale(),
-                                                               outputTensorInfo.GetQuantizationOffset(),
-                                                               expectedOutputData));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::L2NormalizationQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Eps = epsilon;
-    descriptor.m_Parameters.m_DataLayout = layout;
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-
-    return result;
-}
-
-float CalcInvL2Norm(std::initializer_list<float> elements)
-{
-    const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f,
-        [](float acc, float element) { return acc + element * element; });
-    return 1.0f / sqrtf(reduction);
-}
-
-} // anonymous namespace
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> Pad2dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    const float customPaddingValue)
-{
-    const armnn::TensorShape inputShape{ 3, 3 };
-    const armnn::TensorShape outputShape{ 7, 7 };
-
-    const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
-    const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
-
-    std::vector<T> inputValues(
-    QuantizedVector<T>(qScale, qOffset,
-    {
-      // Height (3) x Width (3)
-      4, 8, 6,
-      7, 4, 4,
-      3, 2, 4
-    }));
-
-    auto p = customPaddingValue;
-    std::vector<T> expectedOutputValues;
-    expectedOutputValues = (
-    QuantizedVector<T>(qScale, qOffset,
-    {
-      p, p, p, p, p, p, p,
-      p, p, p, p, p, p, p,
-      p, p, 4, 8, 6, p, p,
-      p, p, 7, 4, 4, p, p,
-      p, p, 3, 2, 4, p, p,
-      p, p, p, p, p, p, p,
-      p, p, p, p, p, p, p
-    }));
-
-    auto inputTensor = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>(inputValues));
-
-    LayerTestResult<T, 2> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>(expectedOutputValues));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::PadQueueDescriptor descriptor;
-
-    std::vector<std::pair<unsigned int, unsigned int>> padList;
-    padList.push_back(std::pair<unsigned int, unsigned int>(2,2));
-    padList.push_back(std::pair<unsigned int, unsigned int>(2,2));
-
-    descriptor.m_Parameters.m_PadList = padList;
-    descriptor.m_Parameters.m_PadValue = customPaddingValue;
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 3> Pad3dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    const armnn::TensorShape inputShape{ 2, 2, 2 };
-    const armnn::TensorShape outputShape{ 3, 5, 6 };
-
-    const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
-    const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
-
-    std::vector<T> inputValues(
-      QuantizedVector<T>(qScale,qOffset,
-    {
-        // Channel 0, Height (2) x Width (2)
-        0, 4,
-        2, 5,
-
-        // Channel 1, Height (2) x Width (2)
-        6, 1,
-        5, 2
-    }));
-
-    std::vector<T> expectedOutputValues(
-      QuantizedVector<T>(qScale,qOffset,
-    {
-
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 4, 0, 0,
-        0, 0, 2, 5, 0, 0,
-        0, 0, 0, 0, 0, 0,
-
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
-        0, 0, 6, 1, 0, 0,
-        0, 0, 5, 2, 0, 0,
-        0, 0, 0, 0, 0, 0,
-
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0
-
-    }));
-
-    auto inputTensor = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>(inputValues));
-
-    LayerTestResult<T, 3> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(expectedOutputValues));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::PadQueueDescriptor descriptor;
-
-    std::vector<std::pair<unsigned int, unsigned int>> PadList;
-    PadList.push_back(std::pair<unsigned int, unsigned int>(0,1));
-    PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
-    PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
-
-    descriptor.m_Parameters.m_PadList = PadList;
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> Pad4dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    const armnn::TensorShape inputShape{ 2, 2, 3, 2 };
-    const armnn::TensorShape outputShape{ 4, 5, 7, 4 };
-
-    const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
-    const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
-
-    std::vector<T> inputValues(
-      QuantizedVector<T>(qScale,qOffset,
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-        0, 1,
-        2, 3,
-        4, 5,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-        6, 7,
-        8, 9,
-        10, 11,
-
-        // Batch 1, Channel 0, Height (3) x Width (2)
-        12, 13,
-        14, 15,
-        16, 17,
-
-        // Batch 1, Channel 1, Height (3) x Width (2)
-        18, 19,
-        20, 21,
-        22, 23
-    }));
-
-    std::vector<T> expectedOutputValues(
-      QuantizedVector<T>(qScale,qOffset,
-    {
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 1, 0,
-        0, 2, 3, 0,
-        0, 4, 5, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 6, 7, 0,
-        0, 8, 9, 0,
-        0, 10, 11, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 12, 13, 0,
-        0, 14, 15, 0,
-        0, 16, 17, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 18, 19, 0,
-        0, 20, 21, 0,
-        0, 22, 23, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0
-    }));
-
-    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(inputValues));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(expectedOutputValues));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::PadQueueDescriptor descriptor;
-
-    std::vector<std::pair<unsigned int, unsigned int>> PadList;
-    PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
-    PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
-    PadList.push_back(std::pair<unsigned int, unsigned int>(3,1));
-    PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
-
-    descriptor.m_Parameters.m_PadList = PadList;
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-
-    return result;
-}
-
-LayerTestResult<uint8_t, 2> PadUint82dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-LayerTestResult<uint8_t, 2> PadUint82dCustomPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0, 1.0f);
-}
-
-LayerTestResult<uint8_t, 3> PadUint83dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad3dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-LayerTestResult<uint8_t, 4> PadUint84dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad4dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-
-template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 2>
-Pad2dTestCommon<armnn::DataType::QuantisedSymm16>(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    const float customPaddingValue);
-
-template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 3>
-Pad3dTestCommon<armnn::DataType::QuantisedSymm16>(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset);
-
-template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-Pad4dTestCommon<armnn::DataType::QuantisedSymm16>(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset);
-
-LayerTestResult<float, 2> PadFloat322dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-LayerTestResult<float, 2> PadFloat322dCustomPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0, 1.0f);
-}
-
-LayerTestResult<float, 3> PadFloat323dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad3dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-LayerTestResult<float, 4> PadFloat324dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Pad4dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> L2NormalizationEpsilonTestCommon(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float scale,
-        int32_t offset,
-        float outScale,
-        int32_t outOffset,
-        const armnn::DataLayout layout,
-        float epsilon)
-{
-    // Width: 1
-    // Height: 1
-    // Channels: 3
-    // BatchSize: 1
-    unsigned int numberOfBatches = 1;
-    unsigned int numberOfChannels = 3;
-    unsigned int height = 1;
-    unsigned int width = 1;
-
-    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
-            numberOfBatches, numberOfChannels, height, width, layout);
-
-    // 0.0000001^2 + 0.00000002^2 + 0.00000003^2 < 1e-12
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (1) x Width (1)
-        0.00000001f,
-
-        // Batch 0, Channel 1, Height (1) x Width (1)
-        0.00000002f,
-
-        // Batch 0, Channel 2, Height (1) x Width (1)
-        0.00000003f,
-    };
-
-    const float approxInvL2Norm = 1.f / sqrtf(epsilon);
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (1) x Width (1)
-        0.00000001f * approxInvL2Norm,
-        0.00000002f * approxInvL2Norm,
-        0.00000003f * approxInvL2Norm,
-    };
-
-    return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
-                                              inputValues, outScale, outOffset, expectedOutputValues, layout,
-                                              epsilon);
-}
-
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> L2Normalization1dTestCommon(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float scale,
-        int32_t offset,
-        float outScale,
-        int32_t outOffset,
-        const armnn::DataLayout layout)
-{
-    // Width: 1
-    // Height: 1
-    // Channels: 10
-    // BatchSize: 1
-    unsigned int numberOfBatches = 1;
-    unsigned int numberOfChannels = 10;
-    unsigned int height = 1;
-    unsigned int width = 1;
-
-
-    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
-            numberOfBatches, numberOfChannels, height, width, layout);
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (1) x Width (1)
-        1.0f,
-
-        // Batch 0, Channel 1, Height (1) x Width (1)
-        2.0f,
-
-        // Batch 0, Channel 2, Height (1) x Width (1)
-        3.0f,
-
-        // Batch 0, Channel 3, Height (1) x Width (1)
-        4.0f,
-
-        // Batch 0, Channel 4, Height (1) x Width (1)
-        5.0f,
-
-        // Batch 0, Channel 5, Height (1) x Width (1)
-        6.0f,
-
-        // Batch 0, Channel 6, Height (1) x Width (1)
-        7.0f,
-
-        // Batch 0, Channel 7, Height (1) x Width (1)
-        8.0f,
-
-        // Batch 0, Channel 8, Height (1) x Width (1)
-        9.0f,
-
-        // Batch 0, Channel 9, Height (1) x Width (1)
-        10.0f
-    };
-    const float approxInvL2Norm = 0.050964719f;
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (1) x Width (1)
-        1.0f * approxInvL2Norm,
-        2.0f * approxInvL2Norm,
-        3.0f * approxInvL2Norm,
-        4.0f * approxInvL2Norm,
-        5.0f * approxInvL2Norm,
-        6.0f * approxInvL2Norm,
-        7.0f * approxInvL2Norm,
-        8.0f * approxInvL2Norm,
-        9.0f * approxInvL2Norm,
-        10.0f * approxInvL2Norm
-    };
-
-
-    return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
-                                              inputValues, outScale, outOffset, expectedOutputValues, layout);
-}
-
-LayerTestResult<float, 4> L2NormalizationDefaultEpsilonTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout)
-{
-    // Dummy descriptor to get the default value of epsilon.
-    armnn::L2NormalizationDescriptor descriptor;
-
-    return L2NormalizationEpsilonTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
-                                                                      layout, descriptor.m_Eps);
-}
-
-LayerTestResult<float, 4> L2NormalizationNonDefaultEpsilonTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout)
-{
-    return L2NormalizationEpsilonTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
-                                                                      layout, 1e-9f);
-}
-
-LayerTestResult<float, 4> L2Normalization1dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,layout);
-}
-
-LayerTestResult<int16_t, 4> L2Normalization1dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
-                                                                         layout);
-}
-
-LayerTestResult<uint8_t, 4> L2Normalization1dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
-                                                                         1.f/128, 128, layout);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> L2Normalization2dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float scale,
-    int32_t offset,
-    float outScale,
-    int32_t outOffset,
-    const armnn::DataLayout layout)
-{
-    // Width: 5
-    // Height: 1
-    // Channels: 2
-    // BatchSize: 1
-    unsigned int numberOfBatches = 1;
-    unsigned int numberOfChannels = 2;
-    unsigned int height = 1;
-    unsigned int width = 5;
-
-    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
-            numberOfBatches, numberOfChannels, height, width, layout);
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (1) x Width (5)
-        1.0f, 3.0f, 5.0f, 7.0f,  9.0f,
-
-        // Batch 0, Channel 1, Height (1) x Width (5)
-        2.0f, 4.0f, 6.0f, 8.0f, 10.0f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (1) x Width (5)
-        1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
-        3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
-        5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
-        7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
-        9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
-
-        // Batch 0, Channel 1, Height (1) x Width (5)
-        2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
-        4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
-        6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
-        8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
-        10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
-    };
-
-    return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
-                                              inputValues, outScale, outOffset, expectedOutputValues, layout);
-}
-
-LayerTestResult<float, 4> L2Normalization2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
-                                                                 layout);
-}
-
-LayerTestResult<int16_t, 4> L2Normalization2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
-                                                                         layout);
-}
-
-LayerTestResult<uint8_t, 4> L2Normalization2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
-                                                                         1.f/128, 128, layout);
-}
-
-LayerTestResult<float, 2> L2Normalization2dShapeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const armnn::DataLayout layout = armnn::DataLayout::NHWC;
-    const armnn::TensorShape inputOutputTensorShape = armnn::TensorShape({ 5, 2 });
-
-    std::vector<float> inputData
-    {
-        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f
-    };
-    std::vector<float> expectedOutputData
-    {
-        1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
-        2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
-        3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
-        4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
-        5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
-        6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
-        7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
-        8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
-        9.0f  * CalcInvL2Norm({ 9.0f, 10.0f }),
-        10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
-    };
-
-    const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
-    const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
-
-    auto inputTensor = MakeTensor<float, 2>(inputTensorInfo, QuantizedVector<float>(
-                                                             inputTensorInfo.GetQuantizationScale(),
-                                                             inputTensorInfo.GetQuantizationOffset(),
-                                                             inputData));
-
-    LayerTestResult<float, 2> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, QuantizedVector<float>(
-                                                                   outputTensorInfo.GetQuantizationScale(),
-                                                                   outputTensorInfo.GetQuantizationOffset(),
-                                                                   expectedOutputData));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::L2NormalizationQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Eps = 1e-12f;
-    descriptor.m_Parameters.m_DataLayout = layout;
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
-
-    workload->PostAllocationConfigure();
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> L2Normalization3dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float scale,
-    int32_t offset,
-    float outScale,
-    int32_t outOffset,
-    const armnn::DataLayout layout)
-{
-    // Width: 3
-    // Height: 4
-    // Channels: 2
-    // BatchSize: 1
-    unsigned int numberOfBatches = 1;
-    unsigned int numberOfChannels = 2;
-    unsigned int height = 4;
-    unsigned int width = 3;
-
-    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
-            numberOfBatches, numberOfChannels, height, width, layout);
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (4) x Width (3)
-        119.0f,  21.0f, 150.0f,
-        149.0f,  32.0f, 179.0f,
-        15.0f, 227.0f, 141.0f,
-        147.0f, 199.0f, 220.0f,
-
-        // Batch 0, Channel 1, Height (4) x Width (3)
-        110.0f, 140.0f,  73.0f,
-        211.0f, 212.0f,  89.0f,
-        24.0f, 138.0f, 188.0f,
-        162.0f,  12.0f, 161.0f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (4) x Width (3)
-        119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
-        21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
-        150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
-        149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
-        32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
-        179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
-        15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
-        227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
-        141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
-        147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
-        199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
-        220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
-
-        // Batch 0, Channel 1, Height (4) x Width (3)
-        110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
-        140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
-        73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
-        211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
-        212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
-        89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
-        24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
-        138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
-        188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
-        162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
-        12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
-        161.0f * CalcInvL2Norm({ 220.0f, 161.0f })
-    };
-
-    return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
-                                              inputValues, outScale, outOffset, expectedOutputValues, layout);
-}
-
-LayerTestResult<float, 4> L2Normalization3dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization3dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
-                                                                 layout);
-}
-
-LayerTestResult<int16_t, 4> L2Normalization3dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
-                                                                         layout);
-}
-
-LayerTestResult<uint8_t, 4> L2Normalization3dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
-                                                                         1.f/128, 128, layout);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> L2Normalization4dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float scale,
-    int32_t offset,
-    float outScale,
-    int32_t outOffset,
-    const armnn::DataLayout layout)
-{
-    // Width: 3
-    // Height: 4
-    // Channels: 3
-    // BatchSize: 2
-    unsigned int numberOfBatches = 2;
-    unsigned int numberOfChannels = 3;
-    unsigned int height = 4;
-    unsigned int width = 3;
-
-    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
-            numberOfBatches, numberOfChannels, height, width, layout);
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (4) x Width (3)
-        235.0f,  46.0f, 178.0f,
-        100.0f, 123.0f,  19.0f,
-        172.0f,  74.0f, 250.0f,
-        6.0f, 195.0f,  80.0f,
-
-        // Batch 0, Channel 1, Height (4) x Width (3)
-        113.0f,  95.0f, 202.0f,
-        77.0f, 114.0f,  71.0f,
-        122.0f, 246.0f, 166.0f,
-        82.0f,  28.0f,  37.0f,
-
-        // Batch 0, Channel 2, Height (4) x Width (3)
-        56.0f, 170.0f, 162.0f,
-        194.0f,  89.0f, 254.0f,
-        12.0f, 209.0f, 200.0f,
-        1.0f,  64.0f,  54.0f,
-
-        // Batch 1, Channel 0, Height (4) x Width (3)
-        67.0f,  90.0f,  49.0f,
-        7.0f, 163.0f,  18.0f,
-        25.0f, 117.0f, 103.0f,
-        247.0f,  59.0f, 189.0f,
-
-        // Batch 1, Channel 1, Height (4) x Width (3)
-        239.0f, 104.0f, 199.0f,
-        17.0f, 124.0f, 153.0f,
-        222.0f, 217.0f, 75.0f,
-        32.0f, 126.0f, 21.0f,
-
-        // Batch 1, Channel 2, Height (4) x Width (3)
-        97.0f, 145.0f, 215.0f,
-        115.0f, 116.0f, 238.0f,
-        226.0f,  16.0f, 132.0f,
-        92.0f, 125.0f,  88.0f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (4) x Width (3)
-        235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
-        46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
-        178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
-        100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
-        123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
-        19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
-        172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
-        74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
-        250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
-        6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
-        195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
-        80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
-
-        // Batch 0, Channel 1, Height (4) x Width (3)
-        113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
-        95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
-        202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
-        77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
-        114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
-        71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
-        122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
-        246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
-        166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
-        82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
-        28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
-        37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
-
-        // Batch 0, Channel 2, Height (4) x Width (3)
-        56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
-        170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
-        162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
-        194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
-        89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
-        254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
-        12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
-        209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
-        200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
-        1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
-        64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
-        54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
-
-        // Batch 1, Channel 0, Height (4) x Width (3)
-        67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
-        90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
-        49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
-        7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
-        163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
-        18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
-        25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
-        117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
-        103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
-        247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
-        59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
-        189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
-
-        // Batch 1, Channel 1, Height (4) x Width (3)
-        239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
-        104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
-        199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
-        17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
-        124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
-        153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
-        222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
-        217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
-        75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
-        32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
-        126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
-        21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
-
-        // Batch 1, Channel 2, Height (4) x Width (3)
-        97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
-        145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
-        215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
-        115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
-        116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
-        238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
-        226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
-        16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
-        132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
-        92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
-        125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
-        88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f })
-    };
-
-    return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
-                                              inputValues, outScale, outOffset, expectedOutputValues, layout);
-}
-
-LayerTestResult<float, 4> L2Normalization4dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization4dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
-                                                                 layout);
-}
-
-LayerTestResult<int16_t, 4> L2Normalization4dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
-                                                                         layout);
-}
-
-LayerTestResult<uint8_t, 4> L2Normalization4dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout)
-{
-    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
-                                                                         1.f/128, 128, layout);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ConstantTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset)
-{
-    constexpr unsigned int inputWidth = 3;
-    constexpr unsigned int inputHeight = 4;
-    constexpr unsigned int inputChannels = 3;
-    constexpr unsigned int inputBatchSize = 2;
-
-    constexpr unsigned int outputWidth = inputWidth;
-    constexpr unsigned int outputHeight = inputHeight;
-    constexpr unsigned int outputChannels = inputChannels;
-    constexpr unsigned int outputBatchSize = inputBatchSize;
-
-    armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
-                                        ArmnnType, qScale, qOffset);
-
-    armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
-                                         ArmnnType, qScale, qOffset);
-
-    // Set quantization parameters if the requested type is a quantized type.
-    if(armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(qScale);
-        inputTensorInfo.SetQuantizationOffset(qOffset);
-        outputTensorInfo.SetQuantizationScale(qScale);
-        outputTensorInfo.SetQuantizationOffset(qOffset);
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, {
-        // Batch 0, Channel 0
-        235.0f,  46.0f, 178.0f,
-        100.0f, 123.0f,  19.0f,
-        172.0f,  74.0f, 250.0f,
-          6.0f, 195.0f,  80.0f,
-
-        // Batch 0, Channel 1
-        113.0f,  95.0f, 202.0f,
-         77.0f, 114.0f,  71.0f,
-        122.0f, 246.0f, 166.0f,
-         82.0f,  28.0f,  37.0f,
-
-        // Batch 0, Channel 2
-         56.0f, 170.0f, 162.0f,
-        194.0f,  89.0f, 254.0f,
-         12.0f, 209.0f, 200.0f,
-          1.0f,  64.0f,  54.0f,
-
-        // Batch 1, Channel 0
-         67.0f,  90.0f,  49.0f,
-          7.0f, 163.0f,  18.0f,
-         25.0f, 117.0f, 103.0f,
-        247.0f,  59.0f, 189.0f,
-
-        // Batch 1, Channel 1
-        239.0f, 104.0f, 199.0f,
-         17.0f, 124.0f, 153.0f,
-        222.0f, 217.0f, 75.0f,
-         32.0f, 126.0f, 21.0f,
-
-        // Batch 1, Channel 2
-         97.0f, 145.0f, 215.0f,
-        115.0f, 116.0f, 238.0f,
-        226.0f,  16.0f, 132.0f,
-         92.0f, 125.0f,  88.0f,
-    })));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = input;
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo);
-    AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]);
-
-    armnn::ConstantQueueDescriptor descriptor;
-    descriptor.m_LayerOutput = &constantTensor;
-
-    armnn::WorkloadInfo info;
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info);
-
-    outputHandle->Allocate();
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-LayerTestResult<float, 4> ConstantTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return ConstantTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
-}
-
-LayerTestResult<int16_t, 4> ConstantInt16SimpleQuantizationScaleNoOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return ConstantTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-LayerTestResult<uint8_t, 4> ConstantUint8SimpleQuantizationScaleNoOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return ConstantTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
-}
-
-LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    unsigned int outputWidth = 3;
-    unsigned int outputHeight = 6;
-    unsigned int outputChannels = 3;
-
-    unsigned int inputWidth1 = 3;
-    unsigned int inputHeight1 = 6;
-    unsigned int inputChannels1 = 2;
-
-    unsigned int inputWidth2 = 3;
-    unsigned int inputHeight2 = 6;
-    unsigned int inputChannels2 = 1;
-
-    // Defines the tensor descriptors.
-    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
-    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
-    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
-
-    // Quantized input1 tensor. Range [-3, 1]
-    const float inputScale1 = 0.015686f;
-    const int32_t inputOffset1 = 192;
-
-    auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27,
-        28, 29, 30,
-        31, 32, 33,
-        34, 35, 36,
-    })
-    );
-
-    // Quatized input2 tensor. Range [-1, 4]
-    const float inputScale2 = 0.019608f;
-    const int32_t inputOffset2 = 50;
-
-    auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
-    {
-        37, 38, 39,
-        40, 41, 42,
-        43, 44, 45,
-        46, 47, 48,
-        49, 50, 51,
-        52, 53, 54,
-    })
-    );
-
-    // Output has the same quantization parameters than input1,
-    // so that only the requantization of input2 is required
-    const float outputScale = 0.015686f;
-    const int32_t outputOffset = 192;
-
-    LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
-
-    ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27,
-        28, 29, 30,
-        31, 32, 33,
-        34, 35, 36,
-
-        176, 177, 178,
-        179, 181, 182,
-        183, 184, 186,
-        187, 188, 189,
-        191, 192, 193,
-        195, 196, 197,
-    })
-    );
-
-    outputTensorInfo.SetQuantizationScale(outputScale);
-    outputTensorInfo.SetQuantizationOffset(outputOffset);
-    inputTensorInfo1.SetQuantizationScale(inputScale1);
-    inputTensorInfo1.SetQuantizationOffset(inputOffset1);
-    inputTensorInfo2.SetQuantizationScale(inputScale2);
-    inputTensorInfo2.SetQuantizationOffset(inputOffset2);
-
-    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
-    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
-
-    std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
-    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
-            subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo1);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
-            subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo2);
-
-    armnn::ConcatQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
-    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_ViewOrigins.push_back(window1);
-    data.m_ViewOrigins.push_back(window2);
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
-
-    inputHandle1->Allocate();
-    inputHandle2->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
-    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-LayerTestResult<uint8_t, 3> ConcatUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    unsigned int outputWidth = 3;
-    unsigned int outputHeight = 6;
-    unsigned int outputChannels = 3;
-
-    unsigned int inputWidth1 = 3;
-    unsigned int inputHeight1 = 6;
-    unsigned int inputChannels1 = 2;
-
-    unsigned int inputWidth2 = 3;
-    unsigned int inputHeight2 = 6;
-    unsigned int inputChannels2 = 1;
-
-    // Defines the tensor descriptors.
-    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
-    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
-    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
-
-    // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
-    const float scale = 0.13497836f;
-    const int32_t offset = -7;
-
-    outputTensorInfo.SetQuantizationScale(scale);
-    outputTensorInfo.SetQuantizationOffset(offset);
-    inputTensorInfo1.SetQuantizationScale(scale);
-    inputTensorInfo1.SetQuantizationOffset(offset);
-    inputTensorInfo2.SetQuantizationScale(scale);
-    inputTensorInfo2.SetQuantizationOffset(offset);
-
-    LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
-
-    ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
-        {
-            1, 2, 3,
-            4, 5, 6,
-            7, 8, 9,
-            10, 11, 12,
-            13, 14, 15,
-            16, 17, 18,
-
-            19, 20, 21,
-            22, 23, 24,
-            25, 26, 27,
-            28, 29, 30,
-            31, 32, 33,
-            34, 35, 36,
-
-            37, 38, 39,
-            40, 41, 42,
-            43, 44, 45,
-            46, 47, 48,
-            49, 50, 51,
-            52, 53, 54,
-        })
-    );
-
-    auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27,
-        28, 29, 30,
-        31, 32, 33,
-        34, 35, 36,
-    })
-    );
-
-    auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
-    {
-        37, 38, 39,
-        40, 41, 42,
-        43, 44, 45,
-        46, 47, 48,
-        49, 50, 51,
-        52, 53, 54,
-    })
-    );
-
-    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
-    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
-
-    std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
-    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
-
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
-        subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo1);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
-        subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo2);
-
-
-    armnn::ConcatQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
-    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_ViewOrigins.push_back(window1);
-    data.m_ViewOrigins.push_back(window2);
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
-
-    inputHandle1->Allocate();
-    inputHandle2->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
-    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-LayerTestResult<uint16_t, 3> ConcatUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    unsigned int outputWidth = 3;
-    unsigned int outputHeight = 6;
-    unsigned int outputChannels = 3;
-
-    unsigned int inputWidth1 = 3;
-    unsigned int inputHeight1 = 6;
-    unsigned int inputChannels1 = 2;
-
-    unsigned int inputWidth2 = 3;
-    unsigned int inputHeight2 = 6;
-    unsigned int inputChannels2 = 1;
-
-    // Defines the tensor descriptors.
-    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedSymm16);
-    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedSymm16);
-    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedSymm16);
-
-    // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
-    const float scale = 0.13497836f;
-    const int32_t offset = -7;
-
-    outputTensorInfo.SetQuantizationScale(scale);
-    outputTensorInfo.SetQuantizationOffset(offset);
-    inputTensorInfo1.SetQuantizationScale(scale);
-    inputTensorInfo1.SetQuantizationOffset(offset);
-    inputTensorInfo2.SetQuantizationScale(scale);
-    inputTensorInfo2.SetQuantizationOffset(offset);
-
-    LayerTestResult<uint16_t, 3> ret(outputTensorInfo);
-
-    ret.outputExpected = MakeTensor<uint16_t, 3>(outputTensorInfo, std::vector<uint16_t>(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27,
-        28, 29, 30,
-        31, 32, 33,
-        34, 35, 36,
-
-        37, 38, 39,
-        40, 41, 42,
-        43, 44, 45,
-        46, 47, 48,
-        49, 50, 51,
-        52, 53, 54,
-    }));
-
-    auto input1 = MakeTensor<uint16_t, 3>(inputTensorInfo1, std::vector<uint16_t>(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27,
-        28, 29, 30,
-        31, 32, 33,
-        34, 35, 36,
-    }));
-
-    auto input2 = MakeTensor<uint16_t, 3>(inputTensorInfo2, std::vector<uint16_t>(
-    {
-        37, 38, 39,
-        40, 41, 42,
-        43, 44, 45,
-        46, 47, 48,
-        49, 50, 51,
-        52, 53, 54,
-    }));
-
-    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
-    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
-
-    std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
-    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
-
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
-            subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo1);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
-            subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo2);
-
-
-    armnn::ConcatQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
-    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_ViewOrigins.push_back(window1);
-    data.m_ViewOrigins.push_back(window2);
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
-
-    inputHandle1->Allocate();
-    inputHandle2->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
-    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-LayerTestResult<float, 4> BatchNormTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // BatchSize: 1
-    // Channels: 2
-    // Height: 3
-    // Width: 2
-
-    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-         1.f, 4.f,
-         4.f, 2.f,
-         1.f, 6.f,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-         1.f, 1.f,
-         4.f, 1.f,
-        -2.f, 4.f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-        1.f, 4.f,
-        4.f, 2.f,
-        1.f, 6.f,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-        3.f, 3.f,
-        4.f, 3.f,
-        2.f, 4.f
-    };
-
-    return BatchNormTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager,
-        inputOutputShape, inputValues, expectedOutputValues,
-        0.f, 0, armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<float, 4> BatchNormNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // BatchSize: 1
-    // Height: 3
-    // Width: 2
-    // Channels: 2
-
-    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
-    std::vector<float> inputValues
-    {
-        // Batch 0, Height 0, Width (2) x Channel (2)
-        1.f,  1.f,
-        4.f,  1.f,
-
-        // Batch 0, Height 1, Width (2) x Channel (2)
-        4.f,  4.f,
-        2.f,  1.f,
-
-        // Batch 0, Height 2, Width (2) x Channel (2)
-        1.f, -2.f,
-        6.f,  4.f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Height 0, Width (2) x Channel (2)
-        1.f, 3.f,
-        4.f, 3.f,
-
-        // Batch 0, Height 1, Width (2) x Channel (2)
-        4.f, 4.f,
-        2.f, 3.f,
-
-        // Batch 0, Height 2, Width (2) x Channel (2)
-        1.f, 2.f,
-        6.f, 4.f
-    };
-
-    return BatchNormTestImpl<armnn::DataType::Float32>(
-        workloadFactory, memoryManager,
-        inputOutputShape, inputValues, expectedOutputValues,
-        0.f, 0, armnn::DataLayout::NHWC);
-}
-
-LayerTestResult<uint8_t, 4> BatchNormUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // BatchSize: 1
-    // Channels: 2
-    // Height: 3
-    // Width: 2
-
-    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-         1.f, 4.f,
-         4.f, 2.f,
-         1.f, 6.f,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-         1.f, 1.f,
-         4.f, 1.f,
-        -2.f, 4.f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-        1.f, 4.f,
-        4.f, 2.f,
-        1.f, 6.f,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-        3.f, 3.f,
-        4.f, 3.f,
-        2.f, 4.f
-    };
-
-    return BatchNormTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager,
-        inputOutputShape, inputValues, expectedOutputValues,
-        1.f/20.f, 50, armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // BatchSize: 1
-    // Height: 3
-    // Width: 2
-    // Channels: 2
-
-    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
-    std::vector<float> inputValues
-    {
-        // Batch 0, Height 0, Width (2) x Channel (2)
-        1.f,  1.f,
-        4.f,  1.f,
-
-        // Batch 0, Height 1, Width (2) x Channel (2)
-        4.f,  4.f,
-        2.f,  1.f,
-
-        // Batch 0, Height 2, Width (2) x Channel (2)
-        1.f, -2.f,
-        6.f,  4.f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Height 0, Width (2) x Channel (2)
-        1.f, 3.f,
-        4.f, 3.f,
-
-        // Batch 0, Height 1, Width (2) x Channel (2)
-        4.f, 4.f,
-        2.f, 3.f,
-
-        // Batch 0, Height 2, Width (2) x Channel (2)
-        1.f, 2.f,
-        6.f, 4.f
-    };
-
-    return BatchNormTestImpl<armnn::DataType::QuantisedAsymm8>
-        (workloadFactory, memoryManager,
-         inputOutputShape, inputValues, expectedOutputValues,
-         1.f/20.f, 50, armnn::DataLayout::NHWC);
-}
-
-LayerTestResult<int16_t, 4> BatchNormInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // BatchSize: 1
-    // Channels: 2
-    // Height: 3
-    // Width: 2
-
-    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
-    std::vector<float> inputValues
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-         1.f, 4.f,
-         4.f, 2.f,
-         1.f, 6.f,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-         1.f, 1.f,
-         4.f, 1.f,
-        -2.f, 4.f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Channel 0, Height (3) x Width (2)
-        1.f, 4.f,
-        4.f, 2.f,
-        1.f, 6.f,
-
-        // Batch 0, Channel 1, Height (3) x Width (2)
-        3.f, 3.f,
-        4.f, 3.f,
-        2.f, 4.f
-    };
-
-    return BatchNormTestImpl<armnn::DataType::QuantisedSymm16>(
-        workloadFactory, memoryManager,
-        inputOutputShape, inputValues, expectedOutputValues,
-        1.f/20.f, 50, armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<int16_t, 4> BatchNormInt16NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // BatchSize: 1
-    // Height: 3
-    // Width: 2
-    // Channels: 2
-
-    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
-    std::vector<float> inputValues
-    {
-        // Batch 0, Height 0, Width (2) x Channel (2)
-        1.f,  1.f,
-        4.f,  1.f,
-
-        // Batch 0, Height 1, Width (2) x Channel (2)
-        4.f,  4.f,
-        2.f,  1.f,
-
-        // Batch 0, Height 2, Width (2) x Channel (2)
-        1.f, -2.f,
-        6.f,  4.f
-    };
-    std::vector<float> expectedOutputValues
-    {
-        // Batch 0, Height 0, Width (2) x Channel (2)
-        1.f, 3.f,
-        4.f, 3.f,
-
-        // Batch 0, Height 1, Width (2) x Channel (2)
-        4.f, 4.f,
-        2.f, 3.f,
-
-        // Batch 0, Height 2, Width (2) x Channel (2)
-        1.f, 2.f,
-        6.f, 4.f
-    };
-
-    return BatchNormTestImpl<armnn::DataType::QuantisedSymm16>
-        (workloadFactory, memoryManager,
-         inputOutputShape, inputValues, expectedOutputValues,
-         1.f/20.f, 50, armnn::DataLayout::NHWC);
-}
-
-LayerTestResult<uint8_t, 4> ConstantUint8CustomQuantizationScaleAndOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return ConstantTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 2e-6f, 1);
-}
-
-LayerTestResult<int16_t, 4> ConstantInt16CustomQuantizationScaleAndOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return ConstantTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 2e-6f, 1);
-}
-
-LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation1dTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim0DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation2dDim1DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation3dDim2TestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, useSubtensor, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation3dDim1DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation3dDim2DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, useSubtensor, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDim2TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, bool useSubtensor)
-{
-    return Concatenation4dDim3TestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1, useSubtensor);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDiffShapeDim0TestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDiffShapeDim1TestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Concatenation4dDiffShapeDim2TestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1);
-}
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor)
-{
-    return Concatenation4dDiffShapeDim3TestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5f, -1, useSubtensor);
-}
-
-LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, forceNoPadding);
-}
-
-LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, forceNoPadding, 3.0f, -5);
-}
-
-LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager, forceNoPadding);
-}
-
-LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, forceNoPadding);
-}
-
-LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, forceNoPadding, 0.1f, 128);
-}
-
-LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager, forceNoPadding);
-}
-
-LayerTestResult<float, 4> SimpleMaxPooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleMaxPooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<uint8_t, 4> SimpleMaxPooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<int16_t, 4> SimpleMaxPooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, dataLayout);
-}
-LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
-            workloadFactory, memoryManager, 1.0f, -5);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleMaxPooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(
-            workloadFactory, memoryManager, 1.0f, -5);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingMaxPooling2dSize3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SimpleAveragePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, dataLayout, 0.5, -1);
-}
-
-LayerTestResult<int16_t, 4> SimpleAveragePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding)
-{
-    return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, forceNoPadding);
-}
-
-LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, 0.5, -1);
-}
-
-LayerTestResult<int16_t, 4> LargeTensorsAveragePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::QuantisedAsymm8>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingAveragePooling2dSize3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SimpleL2Pooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleL2Pooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<int16_t, 4> SimpleL2Pooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout)
-{
-    return SimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, dataLayout);
-}
-
-LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride1Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride4Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> L2Pooling2dSize7Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize7TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize7TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize7Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize7TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> L2Pooling2dSize9Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize9TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize9TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize9Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return L2Pooling2dSize9TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleL2Pooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> IgnorePaddingL2Pooling2dSize3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> AsymmetricNonSquarePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> ComparePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::PoolingAlgorithm  poolingType)
-{
-    return ComparePooling2dTestCommon<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, refWorkloadFactory, poolingType);
-}
-
-LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::PoolingAlgorithm  poolingType)
-{
-    return ComparePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager, refWorkloadFactory, poolingType, 0.1f, 128);
-}
-
-LayerTestResult<int16_t, 4> ComparePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::PoolingAlgorithm  poolingType)
-{
-    return ComparePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager, refWorkloadFactory, poolingType);
-}
-
-LayerTestResult<float, 2> FullyConnectedLargeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool transposeWeights)
-{
-    return FullyConnectedLargeTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, transposeWeights);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdSimpleFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdSimpleTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiChannelsTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiBlockFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiBlockTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdPaddingFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdPaddingTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiChannelsTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiBlockTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdPaddingTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdSimpleNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdSimpleNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiChannelsNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiBlockNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiBlockNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToBatchNdPaddingNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdSimpleNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiChannelsNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiBlockNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdSimpleTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiChannelsTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiBlockTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdPaddingTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdSimpleNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiChannelsNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdMultiBlockNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToDepthNHWCAsymmQ8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest1<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory,
-        memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> SpaceToDepthNCHWAsymmQ8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest1<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory,
-        memoryManager,
-        armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test1(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest1<armnn::DataType::Float32>(
-        workloadFactory,
-        memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test1(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest1<armnn::DataType::Float32>(
-        workloadFactory,
-        memoryManager,
-        armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test2(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest2<armnn::DataType::Float32>(
-        workloadFactory,
-        memoryManager);
-}
-
-LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test2(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest2<armnn::DataType::Float32>(
-        workloadFactory,
-        memoryManager,
-        armnn::DataLayout::NCHW);
-}
-
-LayerTestResult<int16_t, 4> SpaceToDepthNHWCQSymm16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest2<armnn::DataType::QuantisedSymm16>(
-        workloadFactory,
-        memoryManager);
-}
-
-LayerTestResult<int16_t, 4> SpaceToDepthNCHWQSymm16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return SpaceToDepthSimpleTest2<armnn::DataType::QuantisedSymm16>(
-        workloadFactory,
-        memoryManager,
-        armnn::DataLayout::NCHW);
-}
-
-namespace {
-
-} // anonymous namespace
-
-LayerTestResult<float, 4> StridedSlice4DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice4DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> StridedSlice4DReverseFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice4DReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceSimpleStrideTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceSimpleRangeMaskTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceShrinkAxisMaskTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 3> StridedSlice3DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice3DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 3> StridedSlice3DReverseFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice3DReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 2> StridedSlice2DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice2DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 2> StridedSlice2DReverseFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice2DReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> StridedSlice4DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice4DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> StridedSlice4DReverseUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice4DReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceSimpleStrideTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceSimpleRangeMaskTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceShrinkAxisMaskTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 3> StridedSlice3DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice3DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 3> StridedSlice3DReverseUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice3DReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 2> StridedSlice2DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice2DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 2> StridedSlice2DReverseUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice2DReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> StridedSlice4DInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice4DTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> StridedSlice4DReverseInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice4DReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> StridedSliceSimpleStrideInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceSimpleStrideTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> StridedSliceSimpleRangeMaskInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceSimpleRangeMaskTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 2> StridedSliceShrinkAxisMaskInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSliceShrinkAxisMaskTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 3> StridedSlice3DInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice3DTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 3> StridedSlice3DReverseInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice3DReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 2> StridedSlice2DInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice2DTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 2> StridedSlice2DReverseInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return StridedSlice2DReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> Debug4DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug4DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 3> Debug3DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug3DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 2> Debug2DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug2DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 1> Debug1DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug1DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> Debug4DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug4DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 3> Debug3DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug3DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 2> Debug2DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug2DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 1> Debug1DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Debug1DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 1> Gather1DParamsFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Gather1DParamsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 1> Gather1DParamsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Gather1DParamsTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 1> Gather1DParamsInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return Gather1DParamsTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 2> GatherMultiDimParamsFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return GatherMultiDimParamsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 2> GatherMultiDimParamsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return GatherMultiDimParamsTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 2> GatherMultiDimParamsInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return GatherMultiDimParamsTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> GatherMultiDimParamsMultiDimIndicesFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> GatherMultiDimParamsMultiDimIndicesUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedAsymm8>(
-        workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> GatherMultiDimParamsMultiDimIndicesInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedSymm16>(
-            workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> DequantizeSimpleUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return DequantizeSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> DequantizeOffsetUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return DequantizeOffsetTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<float, 4> DequantizeSimpleInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return DequantizeSimpleTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> QuantizeSimpleUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return QuantizeSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> QuantizeClampUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return QuantizeClampTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
-}
-
-LayerTestResult<int16_t, 4> QuantizeClampInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return QuantizeClampTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
-}
index bb21202..a4c09a6 100644 (file)
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
-
-#include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
 
-#include <backendsCommon/test/layerTests/LayerTestResult.hpp>
+#pragma once
 
+#include <backendsCommon/test/layerTests/ActivationTestImpl.hpp>
 #include <backendsCommon/test/layerTests/AdditionTestImpl.hpp>
+#include <backendsCommon/test/layerTests/BatchNormalizationTestImpl.hpp>
+#include <backendsCommon/test/layerTests/BatchToSpaceNdTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ConcatTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ConvertFp16ToFp32TestImpl.hpp>
+#include <backendsCommon/test/layerTests/ConvertFp32ToFp16TestImpl.hpp>
+#include <backendsCommon/test/layerTests/Conv2dTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ConstantTestImpl.hpp>
+#include <backendsCommon/test/layerTests/DebugTestImpl.hpp>
+#include <backendsCommon/test/layerTests/DequantizeTestImpl.hpp>
+#include <backendsCommon/test/layerTests/DetectionPostProcessTestImpl.hpp>
 #include <backendsCommon/test/layerTests/DivisionTestImpl.hpp>
 #include <backendsCommon/test/layerTests/EqualTestImpl.hpp>
+#include <backendsCommon/test/layerTests/FakeQuantizationTestImpl.hpp>
+#include <backendsCommon/test/layerTests/FloorTestImpl.hpp>
+#include <backendsCommon/test/layerTests/FullyConnectedTestImpl.hpp>
+#include <backendsCommon/test/layerTests/GatherTestImpl.hpp>
 #include <backendsCommon/test/layerTests/GreaterTestImpl.hpp>
+#include <backendsCommon/test/layerTests/L2NormalizationTestImpl.hpp>
+#include <backendsCommon/test/layerTests/LstmTestImpl.hpp>
 #include <backendsCommon/test/layerTests/MaximumTestImpl.hpp>
+#include <backendsCommon/test/layerTests/MeanTestImpl.hpp>
 #include <backendsCommon/test/layerTests/MinimumTestImpl.hpp>
 #include <backendsCommon/test/layerTests/MultiplicationTestImpl.hpp>
+#include <backendsCommon/test/layerTests/NormalizationTestImpl.hpp>
+#include <backendsCommon/test/layerTests/PadTestImpl.hpp>
+#include <backendsCommon/test/layerTests/PermuteTestImpl.hpp>
+#include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp>
+#include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
+#include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
+#include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp>
+#include <backendsCommon/test/layerTests/SoftmaxTestImpl.hpp>
+#include <backendsCommon/test/layerTests/SpaceToBatchNdTestImpl.hpp>
+#include <backendsCommon/test/layerTests/SpaceToDepthTestImpl.hpp>
+#include <backendsCommon/test/layerTests/SplitterTestImpl.hpp>
+#include <backendsCommon/test/layerTests/StackTestImpl.hpp>
+#include <backendsCommon/test/layerTests/StridedSliceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SubtractionTestImpl.hpp>
-
-#include <Half.hpp>
-#include "TensorCopyUtils.hpp"
-#include "WorkloadTestUtils.hpp"
-#include "TensorUtils.hpp"
-#include "Permute.hpp"
-
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/IMemoryManager.hpp>
-#include <reference/workloads/Decoders.hpp>
-#include <reference/workloads/Encoders.hpp>
-#include <test/TensorHelpers.hpp>
-
-#include <boost/multi_array.hpp>
-#include <boost/assert.hpp>
-
-#include <array>
-
-// Layer callables.
-
-namespace armnn
-{
-class IWorkloadFactory;
-}
-
-LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled);
-
-LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::DataLayout layout);
-
-LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::DataLayout layout);
-
-LayerTestResult<float,   4> Convolution1dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled);
-
-LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled);
-
-LayerTestResult<float, 4> DepthwiseConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled);
-
-LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float,   4> SimpleMaxPooling2dSize2x2Stride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding);
-
-LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding);
-
-LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding);
-
-LayerTestResult<float,   4> SimpleMaxPooling2dSize3x3Stride2x4Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding);
-
-LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding );
-
-LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding );
-
-LayerTestResult<float,   4> SimpleMaxPooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<uint8_t, 4> SimpleMaxPooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<int16_t, 4> SimpleMaxPooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<float,   4> IgnorePaddingSimpleMaxPooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleMaxPooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> IgnorePaddingMaxPooling2dSize3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingMaxPooling2dSize3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> SimpleAveragePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<int16_t, 4> SimpleAveragePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<float,   4> LargeTensorsAveragePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> LargeTensorsAveragePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool forceNoPadding);
-
-LayerTestResult<float,   4> IgnorePaddingSimpleAveragePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4>   IgnorePaddingSimpleAveragePooling2dNoPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> IgnorePaddingAveragePooling2dSize3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingAveragePooling2dSize3Int16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> SimpleL2Pooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<int16_t, 4> SimpleL2Pooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout dataLayout);
-
-LayerTestResult<float,   4> L2Pooling2dSize3Stride1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride1Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> L2Pooling2dSize3Stride3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> L2Pooling2dSize3Stride4Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride4Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> L2Pooling2dSize7Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize7Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> L2Pooling2dSize9Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> L2Pooling2dSize9Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> IgnorePaddingSimpleL2Pooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingSimpleL2Pooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> IgnorePaddingL2Pooling2dSize3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> IgnorePaddingL2Pooling2dSize3Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,   4> AsymmetricNonSquarePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> AsymmetricNonSquarePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> ComparePooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::PoolingAlgorithm  poolingType);
-
-LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::PoolingAlgorithm  poolingType);
-
-LayerTestResult<int16_t, 4> ComparePooling2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::PoolingAlgorithm  poolingType);
-
-LayerTestResult<float, 4> ConstantLinearActivationTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SimpleNormalizationAcrossTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SimpleNormalizationWithinTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> SimpleSoftmaxTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta);
-
-LayerTestResult<float, 2> SimpleAxisSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis);
-
-LayerTestResult<float, 3> Simple3dSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<float, 3> Simple3dAxisSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis);
-
-LayerTestResult<float, 4> Simple4dSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<float, 4> Simple4dAxisSoftmaxTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis);
-
-LayerTestResult<uint8_t, 2> SimpleSoftmaxUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta);
-
-LayerTestResult<uint8_t,3> Simple3dSoftmaxUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<uint8_t,4> Simple4dSoftmaxUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<int16_t,2> SimpleSoftmaxUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<int16_t,3> Simple3dSoftmaxUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<int16_t,4> Simple4dSoftmaxUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta);
-
-LayerTestResult<float, 4> SimpleSigmoidTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleReshapeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 5> Reshape5dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SimpleFloorTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 1> Concatenation1dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> Concatenation2dDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> Concatenation2dDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Concatenation3dDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Concatenation3dDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Concatenation3dDim2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<float, 4> Concatenation4dDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Concatenation4dDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Concatenation4dDim2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Concatenation4dDim3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim0Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Concatenation4dDiffShapeDim3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDim3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SimpleSigmoidInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> CompareConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory);
-
-template<typename T>
-LayerTestResult<T, 4> CompareDepthwiseConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> CompareNormalizationTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::NormalizationAlgorithmChannel normChannel,
-    armnn::NormalizationAlgorithmMethod normMethod);
-
-LayerTestResult<float, 2> CompareSoftmaxTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    float beta);
-
-LayerTestResult<float, 2> FullyConnectedFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    bool transposeWeights);
-
-std::vector<LayerTestResult<float, 3>> SplitterTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> CopyViaSplitterTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> ConcatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> CompareActivationTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::ActivationFunction f,
-    unsigned int batchSize);
-
-LayerTestResult<float, 4> BatchNormTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchNormNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> CompareBatchNormTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory);
-
-LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(
-    armnn::IWorkloadFactory& workloadFactor,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManagery);
-
-LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> CompareBoundedReLuTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    float upperBound,
-    float lowerBound);
-
-LayerTestResult<float, 4> ReLuTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> ReLuUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> ReLuInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> BoundedReLuInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SoftReLuTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SoftReLuUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SoftReLuInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> LeakyReLuTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> LeakyReLuUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> LeakyReLuInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> AbsTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> AbsUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> AbsInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SqrtTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SqrtUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SqrtInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SquareTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SquareUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SquareInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> TanhTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> TanhUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> TanhInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-
-// Tests that the output should be identical to the input when the output dimensions match the input ones.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeBilinearNopTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleResizeBilinearTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the resize bilinear for minification of a square input matrix (also: input dimensions are a
-// multiple of output dimensions).
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeBilinearSqMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the resize bilinear for minification (output dimensions smaller than input dimensions).
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeBilinearMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the resize bilinear for magnification (output dimensions bigger than input dimensions).
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeBilinearMagTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout  dataLayout);
-
-// Tests that the output should be identical to the input when the output dimensions match the input ones.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeNearestNeighborNopTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the behaviour of the resize NearestNeighbor operation when rescaling a 2x2 image into a 1x1 image.
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleResizeNearestNeighborTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the resize NearestNeighbor for minification of a square input matrix (also: input dimensions are a
-// multiple of output dimensions).
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeNearestNeighborSqMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the resize NearestNeighbor for minification (output dimensions smaller than input dimensions).
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeNearestNeighborMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout);
-
-// Tests the resize NearestNeighbor for magnification (output dimensions bigger than input dimensions).
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> ResizeNearestNeighborMagTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout  dataLayout,
-        float inQuantScale,
-        int32_t inQuantOffset,
-        float outQuantScale,
-        int32_t outQuantOffset);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Rsqrt2dTestCommon(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::TensorInfo inputTensorInfo,
-        const armnn::TensorInfo outputTensorInfo,
-        const std::vector<float>& inputValues,
-        const std::vector<float>& expectedOutputValues);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Rsqrt2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Rsqrt3dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> RsqrtZeroTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> RsqrtNegativeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchNormTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchNormNhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> FakeQuantizationTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> L2NormalizationDefaultEpsilonTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> L2NormalizationNonDefaultEpsilonTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> L2Normalization1dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> L2Normalization1dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> L2Normalization1dUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> L2Normalization2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> L2Normalization2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> L2Normalization2dUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 2> L2Normalization2dShapeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> L2Normalization3dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> L2Normalization3dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> L2Normalization3dUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> L2Normalization4dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> L2Normalization4dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> L2Normalization4dUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout layout);
-
-LayerTestResult<float, 4> ConstantTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> ConstantUint8SimpleQuantizationScaleNoOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> ConstantInt16SimpleQuantizationScaleNoOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float upperBound);
-
-LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float upperBound,
-    float lowerBound);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> FullyConnectedTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled);
-
-std::vector<LayerTestResult<uint8_t, 3>> SplitterUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-std::vector<LayerTestResult<int16_t, 3>> SplitterInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> ConcatUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint16_t, 3> ConcatUint16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> CompareActivationUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    armnn::ActivationFunction f);
-
-LayerTestResult<int16_t, 4> CompareActivationInt16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        armnn::IWorkloadFactory& refWorkloadFactory,
-        armnn::ActivationFunction f);
-
-LayerTestResult<uint8_t, 2> CompareSoftmaxUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::IWorkloadFactory& refWorkloadFactory,
-    float beta);
-
-LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled,
-        const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool biasEnabled,
-    const armnn::DataLayout layout);
-
-LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> ConstantLinearActivationInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchNormUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> BatchNormInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> BatchNormInt16NhwcTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> ConstantUint8CustomQuantizationScaleAndOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> ConstantInt16CustomQuantizationScaleAndOffsetTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool useSubtensor);
-
-LayerTestResult<float, 2> FullyConnectedLargeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    bool transposeWeights);
-
-LayerTestResult<uint8_t, 2> PadUint82dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> PadUint82dCustomPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> PadUint83dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> PadUint84dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> PadFloat322dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> PadFloat322dCustomPaddingTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> PadFloat323dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> PadFloat324dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Pad2dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset,
-    const float customPaddingValue = 0.0f);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Pad3dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Pad4dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float qScale,
-    int32_t qOffset);
-
-void LstmUtilsZeroVectorTest();
-void LstmUtilsMeanStddevNormalizationNoneZeroInputTest();
-void LstmUtilsMeanStddevNormalizationAllZeroInputTest();
-void LstmUtilsMeanStddevNormalizationMixedZeroInputTest();
-void LstmUtilsVectorBatchVectorCwiseProductTest();
-void LstmUtilsVectorBatchVectorAddTest();
-
-LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-// QuantizedLstm
-LayerTestResult<uint8_t, 2> QuantizedLstmTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SimpleConvertFp16ToFp32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<armnn::Half, 4> SimpleConvertFp32ToFp16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> MaximumSimpleTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> MaximumBroadcast1ElementTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> MaximumBroadcast1DVectorTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t , 4> MaximumUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> MaximumBroadcast1ElementUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> MaximumBroadcast1DVectorUint8Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t , 4> MaximumInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> MaximumBroadcast1ElementInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> MaximumBroadcast1DVectorInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 1> MeanSimpleTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> MeanSimpleAxisTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> MeanKeepDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> MeanMultipleDimsTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 1> MeanVts1Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> MeanVts2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> MeanVts3Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdSimpleFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiBlockFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdPaddingFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdSimpleNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdMultiBlockNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToBatchNdPaddingNHWCFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingNHWCUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingNHWCUint16Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNhwcTest1(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNhwcTest2(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNhwcTest3(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNhwcTest4(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNchwTest1(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNchwTest2(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> BatchToSpaceNdNchwTest3(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcTest5(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcTest6(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcTest7(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwTest4(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwTest5(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwTest6(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwTest7(
-    armnn::IWorkloadFactory &workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> StridedSlice4DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> StridedSlice4DReverseFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> StridedSlice3DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> StridedSlice3DReverseFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> StridedSlice2DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> StridedSlice2DReverseFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> StridedSlice4DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> StridedSlice4DReverseUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> StridedSlice3DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> StridedSlice3DReverseUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> StridedSlice2DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> StridedSlice2DReverseUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> StridedSlice4DInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> StridedSlice4DReverseInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> StridedSliceSimpleStrideInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> StridedSliceSimpleRangeMaskInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> StridedSliceShrinkAxisMaskInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 3> StridedSlice3DInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 3> StridedSlice3DReverseInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> StridedSlice2DInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> StridedSlice2DReverseInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Debug4DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Debug3DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> Debug2DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 1> Debug1DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Debug4DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Debug3DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> Debug2DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 1> Debug1DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTest(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2Test(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> Debug4DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 3> Debug3DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> Debug2DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 1> Debug1DFloat32Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> Debug4DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 3> Debug3DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> Debug2DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 1> Debug1DUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 1> Gather1DParamsFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 1> Gather1DParamsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 1> Gather1DParamsInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 2> GatherMultiDimParamsFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 2> GatherMultiDimParamsUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 2> GatherMultiDimParamsInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> GatherMultiDimParamsMultiDimIndicesFloatTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> GatherMultiDimParamsMultiDimIndicesUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> GatherMultiDimParamsMultiDimIndicesInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> DequantizeSimpleUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> DequantizeOffsetUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> DequantizeSimpleInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToDepthNCHWAsymmQ8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> SpaceToDepthNHWCAsymmQ8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test1(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test1(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test2(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test2(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToDepthNHWCQSymm16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> SpaceToDepthNCHWQSymm16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> QuantizeSimpleUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> QuantizeClampUint8Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<int16_t, 4> QuantizeClampInt16Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-std::vector<T> ConvertToDataType(const std::vector<float>& input,
-                                 const armnn::TensorInfo& inputTensorInfo)
-{
-    std::vector<T> output(input.size());
-    auto outputTensorInfo = inputTensorInfo;
-    outputTensorInfo.SetDataType(ArmnnType);
-
-    std::unique_ptr<armnn::Encoder<float>> pOutputEncoder = armnn::MakeEncoder<float>(outputTensorInfo, output.data());
-    armnn::Encoder<float>& rOutputEncoder = *pOutputEncoder;
-
-    for (auto it = input.begin(); it != input.end(); ++it)
-    {
-        rOutputEncoder.Set(*it);
-        ++rOutputEncoder;
-    }
-    return output;
-}
-
-// Utility method to convert a single value to the correct type
-template <typename T>
-T ConvertToDataType(const float& value,
-                    const armnn::TensorInfo& tensorInfo)
-{
-    std::vector<T> output(1);
-    std::unique_ptr<armnn::Encoder<float>> pEncoder = armnn::MakeEncoder<float>(tensorInfo, output.data());
-    armnn::Encoder<float>& rEncoder = *pEncoder;
-    rEncoder.Set(value);
-    return output[0];
-}
-
-template<typename T, typename B>
-LayerTestResult<T, 2> SimpleFullyConnectedTestImpl(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        armnn::TensorInfo inputTensorInfo,
-        armnn::TensorInfo outputTensorInfo,
-        armnn::TensorInfo weightsDesc,
-        armnn::TensorInfo biasesDesc,
-        boost::multi_array<T, 2>& weights,
-        boost::multi_array<B, 1>& bias,
-        boost::multi_array<T, 4>& input,
-        bool biasEnabled,
-        bool transposeWeights)
-{
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::FullyConnectedQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedCpuTensorHandle weightsTensor(weightsDesc);
-    armnn::ScopedCpuTensorHandle biasTensor(biasesDesc);
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, &weights[0][0]);
-    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_TransposeWeightMatrix = transposeWeights;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFullyConnected(data, info);
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> FullyConnectedTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled)
-{
-    constexpr static unsigned int inputWidth = 3u;
-    constexpr static unsigned int inputHeight = 2u;
-    constexpr static unsigned int inputChannels = 1u;
-
-    constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels;
-
-    constexpr static unsigned int outputChannels = 2u;
-
-    armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-    inputTensorInfo.SetQuantizationOffset(63);
-
-    armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(5.f);
-    outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10);
-
-    armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, ArmnnType);
-    weightsDesc.SetQuantizationScale(0.2f);
-    weightsDesc.SetQuantizationOffset(93);
-
-    armnn::TensorInfo biasesDesc({ outputChannels }, GetBiasTypeFromWeightsType(weightsDesc.GetDataType()).value());
-    biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale());
-    biasesDesc.SetQuantizationOffset(0);
-
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(
-        {
-            -1.2f, 6.1f, -3.5f,
-            18.8f, -5.5f, 2.9f
-        },
-        inputTensorInfo));
-
-    auto weights = MakeTensor<T, 2>(weightsDesc, ConvertToDataType<ArmnnType>(
-        {
-            -8.4f, 20.0f, -10.4f, -8, 16.4f, -11.8f,
-            23.4f, 10.4f, -14.0f, -3.8f, -11.8f, 11.4f
-        },
-        weightsDesc));
-
-    auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500});
-
-    result = SimpleFullyConnectedTestImpl<T>(
-            workloadFactory,
-            memoryManager,
-            inputTensorInfo, outputTensorInfo,
-            weightsDesc, biasesDesc,
-            weights, bias, input,
-            biasEnabled, true
-    );
-
-    if (biasEnabled)
-    {
-        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
-                                                 ConvertToDataType<ArmnnType>({80.f, 1460.f}, outputTensorInfo));
-    }
-    else
-    {
-        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
-                                                 ConvertToDataType<ArmnnType>({-107.04f, 110.f}, outputTensorInfo));
-    }
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> Rsqrt2dTestCommon(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::TensorInfo inputTensorInfo,
-        const armnn::TensorInfo outputTensorInfo,
-        const std::vector<float>& inputValues,
-        const std::vector<float>& expectedOutputValues)
-{
-    auto inputTensor = MakeTensor<T, 2>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputValues,inputTensorInfo));
-
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
-                                             ConvertToDataType<ArmnnType>(expectedOutputValues,outputTensorInfo));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::RsqrtQueueDescriptor descriptor;
-
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateRsqrt(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> Rsqrt2dTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const armnn::TensorShape inputShape{ 2, 2 };
-    const armnn::TensorShape outputShape{ 2, 2 };
-
-    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-    inputTensorInfo.SetQuantizationOffset(0);
-
-    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(0.1f);
-    outputTensorInfo.SetQuantizationOffset(0);
-
-    std::vector<float> inputValues
-    {
-        1.f, 4.f,
-        16.f, 25.f
-    };
-
-    std::vector<float> expectedOutputValues
-    {
-        1.f, 0.5f,
-        0.25f, 0.2f
-    };
-
-    return Rsqrt2dTestCommon<ArmnnType>(workloadFactory, memoryManager,
-                                inputTensorInfo, outputTensorInfo,
-                                inputValues, expectedOutputValues);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 3> Rsqrt3dTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const armnn::TensorShape inputShape{ 3, 1, 2 };
-    const armnn::TensorShape outputShape{ 3, 1, 2 };
-
-    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-    inputTensorInfo.SetQuantizationOffset(0);
-
-    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(0.1f);
-    outputTensorInfo.SetQuantizationOffset(0);
-
-    std::vector<float> inputValues
-    {
-        1.f, 4.f, 16.f,
-        25.f, 64.f, 100.f
-    };
-
-    std::vector<float> expectedOutputValues
-    {
-        1.f, 0.5f, 0.25f,
-        0.2f, 0.125f, 0.1f
-    };
-
-    auto inputTensor = MakeTensor<T, 3>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputValues,inputTensorInfo));
-
-    LayerTestResult<T, 3> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo,
-                                             ConvertToDataType<ArmnnType>(expectedOutputValues,outputTensorInfo));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::RsqrtQueueDescriptor descriptor;
-
-    armnn::WorkloadInfo info;
-
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateRsqrt(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> RsqrtZeroTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const armnn::TensorShape inputShape{ 1, 2 };
-    const armnn::TensorShape outputShape{ 1, 2 };
-
-    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-
-    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(0.1f);
-
-    std::vector<float> inputValues
-    {
-        0.f, -0.f
-    };
-
-    std::vector<float> expectedOutputValues
-    {
-        INFINITY, -INFINITY
-    };
-
-    return Rsqrt2dTestCommon<ArmnnType>(workloadFactory, memoryManager,
-                                inputTensorInfo, outputTensorInfo,
-                                inputValues, expectedOutputValues);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> RsqrtNegativeTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const armnn::TensorShape inputShape{ 1, 2 };
-    const armnn::TensorShape outputShape{ 1, 2 };
-
-    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-    inputTensorInfo.SetQuantizationOffset(0);
-
-    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(0.1f);
-    outputTensorInfo.SetQuantizationOffset(0);
-
-    std::vector<float> inputValues
-    {
-        -25.f, -16.f
-    };
-
-    std::vector<float> expectedOutputValues
-    {
-        -NAN, -NAN
-    };
-
-    return Rsqrt2dTestCommon<ArmnnType>(workloadFactory, memoryManager,
-                                inputTensorInfo, outputTensorInfo,
-                                inputValues, expectedOutputValues);
-}
-
-template<typename T, size_t NumDims>
-LayerTestResult<T, NumDims> SimpleReshapeTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    armnn::TensorInfo inputTensorInfo,
-    armnn::TensorInfo outputTensorInfo,
-    const std::vector<T>& inputData,
-    const std::vector<T>& outputExpectedData)
-{
-    auto input = MakeTensor<T, NumDims>(inputTensorInfo, inputData);
-
-    LayerTestResult<T, NumDims> ret(outputTensorInfo);
-    ret.outputExpected = MakeTensor<T, NumDims>(outputTensorInfo, outputExpectedData);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ReshapeQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReshape(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), input.origin());
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> SimpleReshapeTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-
-    unsigned int inputShape[] = { 2, 2, 3, 3 };
-    unsigned int outputShape[] = { 2, 2, 9, 1 };
-
-    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(1.0f);
-    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(1.0f);
-
-    auto input = ConvertToDataType<ArmnnType>(
-        {
-            0.0f, 1.0f, 2.0f,
-            3.0f, 4.0f, 5.0f,
-            6.0f, 7.0f, 8.0f,
-
-            9.0f, 10.0f, 11.0f,
-            12.0f, 13.0f, 14.0f,
-            15.0f, 16.0f, 17.0f,
-
-            18.0f, 19.0f, 20.0f,
-            21.0f, 22.0f, 23.0f,
-            24.0f, 25.0f, 26.0f,
-
-            27.0f, 28.0f, 29.0f,
-            30.0f, 31.0f, 32.0f,
-            33.0f, 34.0f, 35.0f,
-        },
-        inputTensorInfo);
-
-    auto outputExpected = ConvertToDataType<ArmnnType>(
-        {
-            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
-
-            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
-
-            18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f,
-
-            27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f,
-        },
-        outputTensorInfo);
-
-    return SimpleReshapeTestImpl<T, 4>(
-        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 5> Reshape5dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-
-    unsigned int inputShape[] = { 2, 2, 8, 1, 1 };
-    unsigned int outputShape[] = { 2, 2, 2, 2, 2 };
-
-    inputTensorInfo = armnn::TensorInfo(5, inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(1.0f);
-    outputTensorInfo = armnn::TensorInfo(5, outputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(1.0f);
-
-    auto input = ConvertToDataType<ArmnnType>(
-        {
-            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
-            8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
-
-            16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f,
-            24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
-        },
-        inputTensorInfo);
-
-    auto outputExpected = ConvertToDataType<ArmnnType>(
-        {
-            0.0f, 1.0f,
-            2.0f, 3.0f,
-
-            4.0f, 5.0f,
-            6.0f, 7.0f,
-
-
-            8.0f, 9.0f,
-            10.0f, 11.0f,
-
-            12.0f, 13.0f,
-            14.0f, 15.0f,
-
-
-
-            16.0f, 17.0f,
-            18.0f, 19.0f,
-
-            20.0f, 21.0f,
-            22.0f, 23.0f,
-
-
-            24.0f, 25.0f,
-            26.0f, 27.0f,
-
-            28.0f, 29.0f,
-            30.0f, 31.0f,
-        },
-        outputTensorInfo);
-
-    return SimpleReshapeTestImpl<T, 5>(
-        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SimpleFloorTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-
-    armnn::TensorInfo outputTensorInfo(inputTensorInfo);
-    outputTensorInfo.SetQuantizationScale(0.1f);
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(
-        { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f,
-        1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f },
-        inputTensorInfo));
-
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, ConvertToDataType<ArmnnType>(
-        { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f,
-        1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f },
-        outputTensorInfo));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::FloorQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFloor(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeBilinearNopTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(1.5f);
-        inputTensorInfo.SetQuantizationOffset(-3);
-        outputTensorInfo.SetQuantizationScale(1.5f);
-        outputTensorInfo.SetQuantizationOffset(-3);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                     {
-                                             1, 2, 3, 4,
-                                             2, 3, 4, 5,
-                                             3, 4, 5, 6,
-                                             4, 5, 6, 7
-                                     }
-                                   : std::initializer_list<float>
-                                     {
-                                             1.0f, 2.0f, 3.0f, 4.0f,
-                                             2.0f, 3.0f, 4.0f, 5.0f,
-                                             3.0f, 4.0f, 5.0f, 6.0f,
-                                             4.0f, 5.0f, 6.0f, 7.0f,
-
-                                             1.0f, 2.0f, 3.0f, 4.0f,
-                                             2.0f, 3.0f, 4.0f, 5.0f,
-                                             3.0f, 4.0f, 5.0f, 6.0f,
-                                             4.0f, 5.0f, 6.0f, 7.0f
-                                     };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = input;
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> SimpleResizeBilinearTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 1, 1, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 1, 1, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(0.1567f);
-        inputTensorInfo.SetQuantizationOffset(1);
-        outputTensorInfo.SetQuantizationScale(0.1567f);
-        outputTensorInfo.SetQuantizationOffset(1);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                     {
-                                             1, 255,
-                                             200, 250
-                                     }
-                                   : std::initializer_list<float>
-                                     {
-                                             1.0f, 255.0f,
-                                             200.0f, 250.0f,
-
-                                             250.0f, 200.0f,
-                                             250.0f,   1.0f
-                                     };
-
-    // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
-    // then figures out the interpolants and weights. Note this is different to projecting the centre of the
-    // output texel. Thus, for a input matrix of 2x2, we'll expect the output 1x1 matrix to contain, as
-    // its single element, the value that was at position (0,0) of the input matrix (rather than an average,
-    // which we would expect if projecting the centre).
-
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                      {
-                                              1
-                                      }
-                                    : std::initializer_list<float>
-                                      {
-                                              1.0f,
-
-                                              250.0f
-                                      };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeBilinearSqMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(3.141592f);
-        inputTensorInfo.SetQuantizationOffset(3);
-        outputTensorInfo.SetQuantizationScale(3.141592f);
-        outputTensorInfo.SetQuantizationOffset(3);
-    }
-
-        std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                       ? std::initializer_list<float>
-                                         {
-                                                1, 2, 3, 4,
-                                                2, 3, 4, 5,
-                                                3, 4, 5, 6,
-                                                4, 5, 6, 7
-                                         }
-                                       : std::initializer_list<float>
-                                         {
-                                                1.0f, 2.0f, 3.0f, 4.0f,
-                                                2.0f, 3.0f, 4.0f, 5.0f,
-                                                3.0f, 4.0f, 5.0f, 6.0f,
-                                                4.0f, 5.0f, 6.0f, 7.0f,
-
-                                                7.0f, 6.0f, 5.0f, 4.0f,
-                                                6.0f, 5.0f, 4.0f, 3.0f,
-                                                5.0f, 4.0f, 3.0f, 2.0f,
-                                                4.0f, 3.0f, 2.0f, 1.0f
-                                         };
-
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                      {
-                                              1, 3,
-                                              3, 5
-                                      }
-                                    : std::initializer_list<float>
-                                      {
-                                              1.0f, 3.0f,
-                                              3.0f, 5.0f,
-
-                                              7.0f, 5.0f,
-                                              5.0f, 3.0f
-                                      };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeBilinearMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 2, 3, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 1, 2, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 2, 3, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(1.5f);
-        inputTensorInfo.SetQuantizationOffset(-1);
-        outputTensorInfo.SetQuantizationScale(1.5f);
-        outputTensorInfo.SetQuantizationOffset(-1);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                     {
-                                             3.0f, 4.5f, 6.0f, // 1,  2,  3, : Expected quantised values
-                                             9.0f, 13.5f, 21.0f // 5,  8, 13
-                                     }
-                                   : std::initializer_list<float>
-                                     {
-                                             1.0f, 2.0f, 3.0f, 5.0f, 8.0f,
-                                             13.0f, 21.0f, 34.0f, 55.0f, 89.0f,
-                                             144.0f, 233.0f, 377.0f, 610.0f, 987.0f,
-
-                                             987.0f, 610.0f, 377.0f, 233.0f, 144.0f,
-                                             89.0f, 55.0f, 34.0f, 21.0f, 13.0f,
-                                             8.0f, 5.0f, 3.0f, 2.0f, 1.0f
-                                     };
-
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                      {
-                                              3.0f, 5.25f // 1, 3
-                                      }
-                                    : std::initializer_list<float>
-                                      {
-                                              1.0f,   2.6666f,   6.00f,
-                                              78.5f, 179.3333f, 401.00f,
-
-                                              987.0f, 454.6670f, 203.33f,
-                                              48.5f,  22.3333f,  10.00f
-                                      };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeBilinearMagTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 3, 2, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 3, 2, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 3, 5, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(0.010765f);
-        inputTensorInfo.SetQuantizationOffset(7);
-        outputTensorInfo.SetQuantizationScale(0.010132f);
-        outputTensorInfo.SetQuantizationOffset(-18);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                     {
-                                             0.183005f, 2.379065f, // 24, 228, : Expected quantised values
-                                             1.05497f, 1.302565f, // 105, 128,
-                                             2.400595f, 0.68896f // 230, 71
-                                     }
-                                   : std::initializer_list<float>
-                                     {
-                                             1.0f,   2.0f,
-                                             13.0f,  21.0f,
-                                             144.0f, 233.0f,
-
-                                             233.0f, 144.0f,
-                                             21.0f,  13.0f,
-                                             2.0f,   1.0f
-                                     };
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                      {
-                                              0.18300501f, 1.06142902f, 1.93985295f, 2.37906504f, 2.37906504f,
-                                              1.05497003f, 1.15400803f, 1.25304604f, 1.30256498f, 1.30256498f,
-                                              2.40059495f, 1.71594095f, 1.03128707f, 0.68896002f, 0.68896002f
-                                              // 0, 87, 173, 217, 217, : Expected quantised values
-                                              // 86, 96, 106, 111, 111,
-                                              // 219, 151, 84, 50, 50
-                                      }
-                                    : std::initializer_list<float>
-                                      {
-                                              1.0f,   1.4f,   1.8f,   2.0f,   2.0f,
-                                              13.0f,  16.2f,  19.4f,  21.0f,  21.0f,
-                                              144.0f, 179.6f, 215.2f, 233.0f, 233.0f,
-
-                                              233.0f, 197.4f, 161.8f, 144.0f, 144.0f,
-                                              21.0f,  17.8f,  14.6f,  13.0f,  13.0f,
-                                              2.0f,   1.6f,   1.2f,   1.0f,   1.0f
-                                      };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeNearestNeighborNopTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(1.5f);
-        inputTensorInfo.SetQuantizationOffset(-3);
-        outputTensorInfo.SetQuantizationScale(1.5f);
-        outputTensorInfo.SetQuantizationOffset(-3);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                           {
-                                                   1, 2, 3, 4,
-                                                   2, 3, 4, 5,
-                                                   3, 4, 5, 6,
-                                                   4, 5, 6, 7
-                                           }
-                                   : std::initializer_list<float>
-                                           {
-                                                   1.0f, 2.0f, 3.0f, 4.0f,
-                                                   2.0f, 3.0f, 4.0f, 5.0f,
-                                                   3.0f, 4.0f, 5.0f, 6.0f,
-                                                   4.0f, 5.0f, 6.0f, 7.0f,
-
-                                                   1.0f, 2.0f, 3.0f, 4.0f,
-                                                   2.0f, 3.0f, 4.0f, 5.0f,
-                                                   3.0f, 4.0f, 5.0f, 6.0f,
-                                                   4.0f, 5.0f, 6.0f, 7.0f
-                                           };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = input;
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> SimpleResizeNearestNeighborTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 1, 1, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 1, 1, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(0.1567f);
-        inputTensorInfo.SetQuantizationOffset(1);
-        outputTensorInfo.SetQuantizationScale(0.1567f);
-        outputTensorInfo.SetQuantizationOffset(1);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                           {
-                                                   1, 255,
-                                                   200, 250
-                                           }
-                                   : std::initializer_list<float>
-                                           {
-                                                   1.0f, 255.0f,
-                                                   200.0f, 250.0f,
-
-                                                   250.0f, 200.0f,
-                                                   250.0f,   1.0f
-                                           };
-
-    // The 'resize' operation projects the top-left corner of output texels into the input image,
-    // then figures out the interpolants and weights. Note this is different to projecting the centre of the
-    // output texel. Thus, for a input matrix of 2x2, we'll expect the output 1x1 matrix to contain, as
-    // its single element, the value that was at position (0,0) of the input matrix (rather than an average,
-    // which we would expect if projecting the centre).
-
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                            {
-                                                    1
-                                            }
-                                    : std::initializer_list<float>
-                                            {
-                                                    1.0f,
-
-                                                    250.0f
-                                            };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeNearestNeighborSqMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(3.141592f);
-        inputTensorInfo.SetQuantizationOffset(3);
-        outputTensorInfo.SetQuantizationScale(3.141592f);
-        outputTensorInfo.SetQuantizationOffset(3);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                           {
-                                                  1, 2, 3, 4,
-                                                  2, 3, 4, 5,
-                                                  3, 4, 5, 6,
-                                                  4, 5, 6, 7
-                                           }
-                                   : std::initializer_list<float>
-                                           {
-                                                   1.0f, 2.0f, 3.0f, 4.0f,
-                                                   2.0f, 3.0f, 4.0f, 5.0f,
-                                                   3.0f, 4.0f, 5.0f, 6.0f,
-                                                   4.0f, 5.0f, 6.0f, 7.0f,
-
-                                                   7.0f, 6.0f, 5.0f, 4.0f,
-                                                   6.0f, 5.0f, 4.0f, 3.0f,
-                                                   5.0f, 4.0f, 3.0f, 2.0f,
-                                                   4.0f, 3.0f, 2.0f, 1.0f
-                                           };
-
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                            {
-                                                    1, 3,
-                                                    3, 5
-                                            }
-                                    : std::initializer_list<float>
-                                            {
-                                                    1.0f, 3.0f,
-                                                    3.0f, 5.0f,
-
-                                                    7.0f, 5.0f,
-                                                    5.0f, 3.0f
-                                            };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeNearestNeighborMinTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 2, 3, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 1, 2, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 2, 3, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(1.5f);
-        inputTensorInfo.SetQuantizationOffset(-1);
-        outputTensorInfo.SetQuantizationScale(1.5f);
-        outputTensorInfo.SetQuantizationOffset(-1);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                           {
-                                                   3.0f, 4.5f, 6.0f, // 1,  2,  3, : Expected quantised values
-                                                   9.0f, 13.5f, 21.0f // 5,  8, 13
-                                           }
-                                   : std::initializer_list<float>
-                                           {
-                                                   1.0f, 2.0f, 3.0f, 5.0f, 8.0f,
-                                                   13.0f, 21.0f, 34.0f, 55.0f, 89.0f,
-                                                   144.0f, 233.0f, 377.0f, 610.0f, 987.0f,
-
-                                                   987.0f, 610.0f, 377.0f, 233.0f, 144.0f,
-                                                   89.0f, 55.0f, 34.0f, 21.0f, 13.0f,
-                                                   8.0f, 5.0f, 3.0f, 2.0f, 1.0f
-                                           };
-
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                            {
-                                                    3.0f, 4.5f // 1, 3
-                                            }
-                                    : std::initializer_list<float>
-                                            {
-                                                    1.f,   2.f,   5.f,
-                                                   13.f,  21.f,  55.f,
-
-                                                  987.f, 610.f, 233.f,
-                                                   89.f,  55.f,  21.f
-                                            };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> ResizeNearestNeighborMagTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout dataLayout,
-        float inQuantScale,
-        int32_t inQuantOffset,
-        float outQuantScale,
-        int32_t outQuantOffset)
-{
-    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
-                                        ?  armnnUtils::GetTensorInfo(1, 1, 3, 2, dataLayout, ArmnnType)
-                                        :  armnnUtils::GetTensorInfo(1, 2, 3, 2, dataLayout, ArmnnType);
-    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
-                                         ?  armnnUtils::GetTensorInfo(1, 1, 3, 5, dataLayout, ArmnnType)
-                                         :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(inQuantScale);
-        inputTensorInfo.SetQuantizationOffset(inQuantOffset);
-        outputTensorInfo.SetQuantizationScale(outQuantScale);
-        outputTensorInfo.SetQuantizationOffset(outQuantOffset);
-    }
-
-    std::vector<float> inputData = armnn::IsQuantizedType<T>()
-                                   ? std::initializer_list<float>
-                                        {
-                                            0.183005f, 2.379065f, //  24, 228, : expected quantised values
-                                            1.054970f, 1.302565f, // 105, 128,
-                                            2.400595f, 0.688960f  // 230, 71
-                                        }
-                                   : std::initializer_list<float>
-                                        {
-                                               1.0f,   2.0f,
-                                              13.0f,  21.0f,
-                                            144.0f, 233.0f,
-
-                                            233.0f, 144.0f,
-                                             21.0f,  13.0f,
-                                              2.0f,   1.0f
-                                        };
-    std::vector<float> outputData = armnn::IsQuantizedType<T>()
-                                    ? std::initializer_list<float>
-                                        {
-                                            0.183005f, 0.183005f, 0.183005f, 2.379065f, 2.379065f,
-                                            1.054970f, 1.054970f, 1.054970f, 1.302565f, 1.302565f,
-                                            2.400595f, 2.400595f, 2.400595f, 0.688960f, 0.688960f
-                                        }
-                                    : std::initializer_list<float>
-                                        {
-                                              1.f,   1.f,   1.f,   2.f,   2.f,
-                                             13.f,  13.f,  13.f,  21.f,  21.f,
-                                            144.f, 144.f, 144.f, 233.f, 233.f,
-
-                                            233.f, 233.f, 233.f, 144.f, 144.f,
-                                             21.f,  21.f,  21.f,  13.f,  13.f,
-                                              2.f,   2.f,   2.f,   1.f,   1.f
-                                        };
-
-    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
-    if (dataLayout == armnn::DataLayout::NHWC)
-    {
-        std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
-        inputData = tmp;
-
-        std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
-        outputData = tmp1;
-    }
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::ResizeQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_DataLayout = dataLayout;
-    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T, std::size_t InputDim, std::size_t OutputDim>
-LayerTestResult<T, OutputDim> MeanTestHelper(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const unsigned int* inputShape,
-        const std::vector<float>& inputData,
-        const std::vector<unsigned int>& axis,
-        bool keepDims,
-        const unsigned int* outputShape,
-        const std::vector<float>& outputData,
-        float scale = 1.0f,
-        int32_t offset = 0)
-{
-    armnn::TensorInfo inputTensorInfo(InputDim, inputShape, ArmnnType);
-    armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, ArmnnType);
-
-    inputTensorInfo.SetQuantizationScale(scale);
-    inputTensorInfo.SetQuantizationOffset(offset);
-
-    outputTensorInfo.SetQuantizationScale(scale);
-    outputTensorInfo.SetQuantizationOffset(offset);
-
-    auto input = MakeTensor<T, InputDim>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
-
-    LayerTestResult<T, OutputDim> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, OutputDim>(
-            outputTensorInfo, ConvertToDataType<ArmnnType>(outputData, outputTensorInfo));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::MeanQueueDescriptor data;
-    data.m_Parameters.m_Axis = axis;
-    data.m_Parameters.m_KeepDims = keepDims;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data,  info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMean(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), input.origin());
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 1> MeanSimpleTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 3, 2 };
-    const unsigned int outputShape[] = { 1 };
-
-    std::vector<float> input({ 1.5f, 1.5f, 2.5f, 2.5f, 3.5f, 3.5f });
-    std::vector<float> output({ 2.5f });
-
-    return MeanTestHelper<ArmnnType, T, 2, 1>(
-            workloadFactory, memoryManager, inputShape, input, {}, false, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 3> MeanSimpleAxisTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 2, 3, 1, 2 };
-    const unsigned int outputShape[] = { 3, 1, 2 };
-
-    std::vector<float> input({ 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f });
-    std::vector<float> output({ 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f });
-
-    return MeanTestHelper<ArmnnType, T, 4, 3>(
-            workloadFactory, memoryManager, inputShape, input, { 0 }, false, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> MeanKeepDimsTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 1, 1, 3, 2 };
-    const unsigned int outputShape[] = { 1, 1, 1, 2 };
-
-    std::vector<float> input({ 1.5f, 1.5f, 2.5f, 2.5f, 3.5f, 3.5f });
-    std::vector<float> output({ 2.5f, 2.5f });
-
-    return MeanTestHelper<ArmnnType, T, 4, 4>(
-            workloadFactory, memoryManager, inputShape, input, { 2 }, true, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 4> MeanMultipleDimsTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 2, 3, 1, 2 };
-    const unsigned int outputShape[] = { 1, 3, 1, 1 };
-
-    std::vector<float> input({ 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5 });
-    std::vector<float> output({ 2.0f, 4.0f, 6.0f });
-
-    return MeanTestHelper<ArmnnType, T, 4, 4>(
-            workloadFactory, memoryManager, inputShape, input, { 0, 3 }, true, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 1> MeanVts1Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 4, 3, 2 };
-    const unsigned int outputShape[] = { 2 };
-
-    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
-                               15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
-    std::vector<float> output({ 12.0f, 13.0f });
-
-    return MeanTestHelper<ArmnnType, T, 3, 1>(
-            workloadFactory, memoryManager, inputShape, input, { 0, 1 }, false, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 3> MeanVts2Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 4, 3, 2 };
-    const unsigned int outputShape[] = { 1, 3, 1 };
-
-    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
-                               15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
-    std::vector<float> output({ 10.5f, 12.5f, 14.5f });
-
-    return MeanTestHelper<ArmnnType, T, 3, 3>(
-            workloadFactory, memoryManager, inputShape, input, { 0, 2 }, true, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 3> MeanVts3Test(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = { 1, 2, 2, 1 };
-    const unsigned int outputShape[] = { 1, 2, 1 };
-
-    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f });
-    std::vector<float> output({ 1.5f, 3.5f });
-
-    return MeanTestHelper<ArmnnType, T, 4, 3>(
-            workloadFactory, memoryManager, inputShape, input, { 2 }, false, outputShape, output);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> ConcatDifferentInputOutputQParamTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool useSubtensor)
-{
-    // Defines the tensor descriptors.
-    armnn::TensorInfo outputTensorInfo({ 3, 6, 3 }, ArmnnType);
-    armnn::TensorInfo inputTensorInfo1({ 3, 6, 2 }, ArmnnType);
-    armnn::TensorInfo inputTensorInfo2({ 3, 6, 1 }, ArmnnType);
-
-    std::vector<armnn::TensorShape> inputTensorShapes({inputTensorInfo1.GetShape(), inputTensorInfo2.GetShape()});
-
-    // Quantized input1 tensor.
-    const float inputScale1 = 0.5f;
-    const int32_t inputOffset1 = 5;
-
-    auto input1 = MakeTensor<T, 3>(inputTensorInfo1, std::vector<T>(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9,
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27,
-        28, 29, 30,
-        31, 32, 33,
-        34, 35, 36
-    }));
-
-    // Quatized input2 tensor.
-    const float inputScale2 = 0.2f;
-    const int32_t inputOffset2 = 10;
-
-    auto input2 = MakeTensor<T, 3>(inputTensorInfo2, std::vector<T>(
-    {
-        37, 38, 39,
-        40, 41, 42,
-        43, 44, 45,
-        46, 47, 48,
-        49, 50, 51,
-        52, 53, 54
-    }));
-
-    // Quantized output tensor.
-    const float outputScale = 0.1f;
-    const int32_t outputOffset = 20;
-
-    LayerTestResult<T, 3> ret(outputTensorInfo);
-
-    ret.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(
-    {
-        0,   5,  74,
-        10,  15,  76,
-        20,  25,  78,
-        30,  35,  80,
-        40,  45,  82,
-        50,  55,  84,
-
-        60,  65,  86,
-        70,  75,  88,
-        80,  85,  90,
-        90,  95,  92,
-        100, 105,  94,
-        110, 115,  96,
-
-        120, 125,  98,
-        130, 135, 100,
-        140, 145, 102,
-        150, 155, 104,
-        160, 165, 106,
-        170, 175, 108
-    }));
-
-    outputTensorInfo.SetQuantizationScale(outputScale);
-    outputTensorInfo.SetQuantizationOffset(outputOffset);
-    inputTensorInfo1.SetQuantizationScale(inputScale1);
-    inputTensorInfo1.SetQuantizationOffset(inputOffset1);
-    inputTensorInfo2.SetQuantizationScale(inputScale2);
-    inputTensorInfo2.SetQuantizationOffset(inputOffset2);
-
-    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
-    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
-
-    std::vector<unsigned int> wOrigin2 = { 0, 0, 2 }; //Extent of the window is defined by size of input[1].
-    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    bool subTensorsSupported = useSubtensor && workloadFactory.SupportsSubTensors();
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
-            subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo1);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
-            subTensorsSupported ?
-            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
-            workloadFactory.CreateTensorHandle(inputTensorInfo2);
-
-    armnn::ConcatQueueDescriptor data;
-    armnn::OriginsDescriptor desc = armnn::CreateDescriptorForConcatenation(
-            inputTensorShapes.begin(),inputTensorShapes.end(), 2);
-    data.m_Parameters = desc;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
-    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    data.m_ViewOrigins.push_back(window1);
-    data.m_ViewOrigins.push_back(window2);
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
-
-    inputHandle1->Allocate();
-    inputHandle2->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
-    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> PreluTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 1, 2, 2, 3 }, ArmnnType);
-    armnn::TensorInfo alphaTensorInfo ({ 1, 1, 1, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 2, 2, 3 }, ArmnnType);
-
-    if (armnn::IsQuantizedType<T>())
-    {
-        inputTensorInfo.SetQuantizationScale(0.25f);
-        inputTensorInfo.SetQuantizationOffset(128);
-        alphaTensorInfo.SetQuantizationScale(0.25f);
-        alphaTensorInfo.SetQuantizationOffset(50);
-        outputTensorInfo.SetQuantizationScale(0.5f);
-        outputTensorInfo.SetQuantizationOffset(120);
-    }
-
-    std::vector<float> inputData
-    {
-        // Expected quantized values:
-        // 128, 128, 128, 132, 132, 132, 124, 124, 124, 120, 120, 120
-        0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -2.0f, -2.0f, -2.0f
-    };
-    std::vector<float> alphaData
-    {
-        // Expected quantized values:
-        // 50, 54, 58
-        0.0f, 1.0f, 2.0f
-    };
-    std::vector<float> outputExpectedData =
-    {
-        // Expected quantized values:
-        // 20, 120, 120, 122, 122, 122, 120, 118, 116, 120, 116, 112
-       0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, -1.0f, -2.0f, 0.0f, -2.0f, -4.0f
-    };
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
-                                                                      inputTensorInfo.GetQuantizationOffset(),
-                                                                      inputData));
-    auto alpha = MakeTensor<T, 4>(alphaTensorInfo, QuantizedVector<T>(alphaTensorInfo.GetQuantizationScale(),
-                                                                      alphaTensorInfo.GetQuantizationOffset(),
-                                                                      alphaData));
-
-    LayerTestResult<T, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
-                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
-                                                                outputTensorInfo.GetQuantizationOffset(),
-                                                                outputExpectedData));
-
-    std::unique_ptr <armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> alphaHandle  = workloadFactory.CreateTensorHandle(alphaTensorInfo);
-    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::PreluQueueDescriptor descriptor;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload (descriptor, info, inputTensorInfo,  inputHandle.get());
-    AddInputToWorkload (descriptor, info, alphaTensorInfo,  alphaHandle.get());
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePrelu(descriptor, info);
-
-    inputHandle->Allocate();
-    alphaHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
-    CopyDataToITensorHandle(alphaHandle.get(), &alpha[0][0][0][0]);
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType,
-        std::size_t InputDim,
-        std::size_t OutputDim,
-        typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, OutputDim> BatchToSpaceNdHelper(
-        armnn::IWorkloadFactory &workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::DataLayout& dataLayout,
-        const unsigned int *inputShape,
-        const std::vector<float> &inputData,
-        const std::vector<unsigned int> &blockShape,
-        const std::vector<std::pair<unsigned int, unsigned int>> &crops,
-        const unsigned int *outputShape,
-        const std::vector<float> &outputData,
-        float scale = 1.0f,
-        int32_t offset = 0)
-{
-    armnn::TensorInfo inputTensorInfo(InputDim, inputShape, ArmnnType);
-    armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, ArmnnType);
-
-    inputTensorInfo.SetQuantizationScale(scale);
-    inputTensorInfo.SetQuantizationOffset(offset);
-
-    outputTensorInfo.SetQuantizationScale(scale);
-    outputTensorInfo.SetQuantizationOffset(offset);
-
-    auto input = MakeTensor<T, InputDim>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
-
-    LayerTestResult<T, OutputDim> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, OutputDim>(outputTensorInfo,
-                                                     ConvertToDataType<ArmnnType>(outputData, outputTensorInfo));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::BatchToSpaceNdQueueDescriptor data;
-    data.m_Parameters.m_DataLayout = dataLayout;
-    data.m_Parameters.m_BlockShape = blockShape;
-    data.m_Parameters.m_Crops = crops;
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchToSpaceNd(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), input.origin());
-
-    workload->PostAllocationConfigure();
-    workload->Execute();
-
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest1(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 2, 2, 1};
-    const unsigned int outputShape[] = {1, 4, 4, 1};
-
-    std::vector<float> input({
-                                     // Batch 0, Height 0, Width (2) x Channel (1)
-                                     1.0f, 3.0f,
-                                     // Batch 0, Height 1, Width (2) x Channel (1)
-                                     9.0f, 11.0f,
-
-
-                                     // Batch 1, Height 0, Width (2) x Channel (1)
-                                     2.0f, 4.0f,
-                                     // Batch 1, Height 1, Width (2) x Channel (1)
-                                     10.0f, 12.0f,
-
-
-                                     // Batch 2, Height 0, Width (2) x Channel (1)
-                                     5.0f, 7.0f,
-                                     // Batch 2, Height 1, Width (2) x Channel (1)
-                                     13.0f, 15.0f,
-
-                                     // Batch 3, Height 0, Width (2) x Channel (3)
-                                     6.0f, 8.0f,
-                                     // Batch 3, Height 1, Width (2) x Channel (1)
-                                     14.0f, 16.0f
-                             });
-
-    std::vector<float> expectedOutput({
-                                              1.0f,   2.0f,  3.0f,  4.0f,
-                                              5.0f,   6.0f,  7.0f,  8.0f,
-                                              9.0f,  10.0f, 11.0f,  12.0f,
-                                              13.0f, 14.0f, 15.0f,  16.0f
-                                      });
-
-    std::vector<unsigned int> blockShape {2, 2};
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest2(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 1, 1, 1};
-    const unsigned int outputShape[] = {1, 2, 2, 1};
-
-    std::vector<float> input({
-                                     // Batch 0, Height 0, Width (2) x Channel (1)
-                                     1.0f, 2.0f, 3.0f, 4.0f
-                             });
-
-    std::vector<float> expectedOutput({1.0f, 2.0f, 3.0f, 4.0f});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest3(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 1, 1, 3};
-    const unsigned int outputShape[] = {1, 2, 2, 3};
-
-    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f});
-
-    std::vector<float> expectedOutput({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest4(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {8, 1, 3, 1};
-    const unsigned int outputShape[] = {2, 2, 4, 1};
-
-    std::vector<float> input({
-                                     0.0f, 1.0f, 3.0f,
-                                     0.0f, 9.0f, 11.0f,
-                                     0.0f, 2.0f, 4.0f,
-                                     0.0f, 10.0f, 12.0f,
-                                     0.0f, 5.0f, 7.0f,
-                                     0.0f, 13.0f, 15.0f,
-                                     0.0f, 6.0f, 8.0f,
-                                     0.0f, 14.0f, 16.0f
-                             });
-
-    std::vector<float> expectedOutput({
-                                              1.0f, 2.0f, 3.0f, 4.0f,
-                                              5.0f, 6.0f, 7.0f, 8.0f,
-                                              9.0f, 10.0f, 11.0f, 12.0f,
-                                              13.0f, 14.0f, 15.0f, 16.0f
-                                      });
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {2, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest5(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 2, 2, 1};
-    const unsigned int outputShape[] = {1, 4, 4, 1};
-
-    std::vector<float> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
-    std::vector<float> expectedOutput({1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager, armnn::DataLayout::NHWC, inputShape,
-                                                 input, blockShape, crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest6(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 1, 1, 1};
-    const unsigned int outputShape[] = {1, 2, 2, 1};
-
-    std::vector<float> input({
-                                     // Batch 0, Height 0, Width (2) x Channel (1)
-                                     1, 2, 3, 4
-                             });
-
-    std::vector<float> expectedOutput({1, 2, 3, 4});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                 armnn::DataLayout::NHWC, inputShape, input, blockShape,
-                                                 crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNhwcTest7(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 1, 1, 3};
-    const unsigned int outputShape[] = {1, 2, 2, 3};
-
-    std::vector<float> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-
-    std::vector<float> expectedOutput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                 armnn::DataLayout::NHWC, inputShape, input, blockShape,
-                                                 crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest1(
-        armnn::IWorkloadFactory &workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 3, 1, 1};
-    const unsigned int outputShape[] = {1, 3, 2, 2};
-
-    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f});
-
-    std::vector<float> expectedOutput({
-                                              // Batch 0, Channel 0, Height (2) x Width (2)
-                                              1.0f,  4.0f,
-                                              7.0f, 10.0f,
-
-                                              // Batch 0, Channel 1, Height (2) x Width (2)
-                                              2.0f,  5.0f,
-                                              8.0f, 11.0f,
-
-                                              // Batch 0, Channel 2, Height (2) x Width (2)
-                                              3.0f,  6.0f,
-                                              9.0f, 12.0f,
-                                      });
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest2(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 1, 1, 1};
-    const unsigned int outputShape[] = {1, 1, 2, 2};
-
-    std::vector<float> input({
-                                     // Batch 0, Height 0, Width (2) x Channel (1)
-                                     1.0f, 2.0f, 3.0f, 4.0f
-                             });
-
-    std::vector<float> expectedOutput({1.0f, 2.0f, 3.0f, 4.0f});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest3(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 3, 1, 1};
-    const unsigned int outputShape[] = {1, 3, 2, 2};
-
-    std::vector<float> input({1.0f, 3.0f, 5.0f, 7.0f, 9.0f, 11.0f, 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f});
-
-    std::vector<float> expectedOutput({
-                                              // Batch 0, Channel 0, Height (2) x Width (2)
-                                              1.0f,  7.0f,
-                                              2.0f,  8.0f,
-
-                                              // Batch 0, Channel 1, Height (2) x Width (2)
-                                              3.0f,  9.0f,
-                                              4.0f, 10.0f,
-
-                                              // Batch 0, Channel 2, Height (2) x Width (2)
-                                              5.0f, 11.0f,
-                                              6.0f, 12.0f,
-                                      });
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                                armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                                crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest4(
-        armnn::IWorkloadFactory &workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 3, 1, 1};
-    const unsigned int outputShape[] = {1, 3, 2, 2};
-
-    std::vector<float> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-
-    std::vector<float> expectedOutput({
-                                              // Batch 0, Channel 0, Height (2) x Width (2)
-                                              1,  4,
-                                              7, 10,
-
-                                              // Batch 0, Channel 1, Height (2) x Width (2)
-                                              2,  5,
-                                              8, 11,
-
-                                              // Batch 0, Channel 2, Height (2) x Width (2)
-                                              3,  6,
-                                              9, 12,
-                                      });
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                 crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest5(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 1, 1, 1};
-    const unsigned int outputShape[] = {1, 1, 2, 2};
-
-    std::vector<float> input({
-                                     // Batch 0, Height 0, Width (2) x Channel (1)
-                                     1, 2, 3, 4
-                             });
-
-    std::vector<float> expectedOutput({1, 2, 3, 4});
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                 crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest6(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {4, 3, 1, 1};
-    const unsigned int outputShape[] = {1, 3, 2, 2};
-
-    std::vector<float> input({1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12});
-
-    std::vector<float> expectedOutput({
-                                              // Batch 0, Channel 0, Height (2) x Width (2)
-                                              1,  7,
-                                              2,  8,
-
-                                              // Batch 0, Channel 1, Height (2) x Width (2)
-                                              3,  9,
-                                              4, 10,
-
-                                              // Batch 0, Channel 2, Height (2) x Width (2)
-                                              5, 11,
-                                              6, 12,
-                                      });
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                 crops, outputShape, expectedOutput);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> BatchToSpaceNdNchwTest7(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputShape[] = {8, 1, 1, 3};
-    const unsigned int outputShape[] = {2, 1, 2, 4};
-
-    std::vector<float> input({
-                                     0, 1, 3, 0,  9, 11,
-                                     0, 2, 4, 0, 10, 12,
-                                     0, 5, 7, 0, 13, 15,
-                                     0, 6, 8, 0, 14, 16
-                             });
-
-    std::vector<float> expectedOutput({
-                                              1,  2,  3,  4,
-                                              5,  6,  7,  8,
-                                              9, 10, 11, 12,
-                                              13, 14, 15, 16
-                                      });
-
-    std::vector<unsigned int> blockShape({2, 2});
-    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {2, 0}};
-
-    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
-                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
-                                                 crops, outputShape, expectedOutput);
-}
-
-template LayerTestResult<typename armnn::ResolveType<armnn::DataType::Float32>, 4>
-PreluTest<armnn::DataType::Float32>(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
-PreluTest<armnn::DataType::QuantisedAsymm8>(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
-PreluTest<armnn::DataType::QuantisedSymm16>(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-template<armnn::DataType ArmnnType, typename T, std::size_t outputDimLength>
-LayerTestResult<T, outputDimLength> StackTestHelper(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::TensorInfo& inputTensorInfo,
-        const armnn::TensorInfo& outputTensorInfo,
-        unsigned int axis,
-        const std::vector<std::vector<T>>& inputData,
-        const std::vector<T>& outputExpectedData)
-{
-    unsigned int numInputs = static_cast<unsigned int>(inputData.size());
-    std::vector<boost::multi_array<T, outputDimLength-1>> inputs;
-    for (unsigned int i = 0; i < numInputs; ++i)
-    {
-        inputs.push_back(MakeTensor<T, outputDimLength-1>(inputTensorInfo, inputData[i]));
-    }
-
-    LayerTestResult<T, outputDimLength> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<T, outputDimLength>(outputTensorInfo, outputExpectedData);
-
-    std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
-    for (unsigned int i = 0; i < numInputs; ++i)
-    {
-        inputHandles.push_back(workloadFactory.CreateTensorHandle(inputTensorInfo));
-    }
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::StackQueueDescriptor descriptor;
-    descriptor.m_Parameters.m_Axis = axis;
-    descriptor.m_Parameters.m_InputShape = inputTensorInfo.GetShape();
-    descriptor.m_Parameters.m_NumInputs = numInputs;
-
-    armnn::WorkloadInfo info;
-    for (unsigned int i = 0; i < numInputs; ++i)
-    {
-        std::unique_ptr<armnn::ITensorHandle>& inputHandle = inputHandles[i];
-        AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
-        inputHandle->Allocate();
-        CopyDataToITensorHandle(inputHandle.get(), inputs[i].origin());
-    }
-
-    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
-    outputHandle->Allocate();
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateStack(descriptor, info);
-
-    workload->Execute();
-
-    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Stack0AxisTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 2, 3, 2, 3 }, ArmnnType);
-
-    std::vector<std::vector<T>> inputData;
-
-    inputData.push_back(
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18
-    });
-
-    inputData.push_back(
-    {
-        19, 20, 21,
-        22, 23, 24,
-
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36
-    });
-
-    std::vector<T> outputExpectedData =
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18,
-
-
-        19, 20, 21,
-        22, 23, 24,
-
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36
-    };
-
-    return StackTestHelper<ArmnnType, T, 4>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo,
-        outputTensorInfo,
-        0U,
-        inputData,
-        outputExpectedData
-    );
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Stack4dOutput1AxisTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 3, 2, 2, 3 }, ArmnnType);
-
-    std::vector<std::vector<T>> inputData;
-
-    inputData.push_back(
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18
-    });
-
-    inputData.push_back(
-    {
-        19, 20, 21,
-        22, 23, 24,
-
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36
-    });
-
-    std::vector<T> outputExpectedData =
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        19, 20, 21,
-        22, 23, 24,
-
-
-        7, 8, 9,
-        10, 11, 12,
-
-        25, 26, 27,
-        28, 29, 30,
-
-
-        13, 14, 15,
-        16, 17, 18,
-
-        31, 32, 33,
-        34, 35, 36
-    };
-
-    return StackTestHelper<ArmnnType, T, 4>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo,
-        outputTensorInfo,
-        1U,
-        inputData,
-        outputExpectedData
-    );
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Stack4dOutput2AxisTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 3, 2, 2, 3 }, ArmnnType);
-
-    std::vector<std::vector<T>> inputData;
-
-    inputData.push_back(
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18
-    });
-
-    inputData.push_back(
-    {
-        19, 20, 21,
-        22, 23, 24,
-
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36
-    });
-
-    std::vector<T> outputExpectedData =
-    {
-        1, 2, 3,
-        19, 20, 21,
-
-        4, 5, 6,
-        22, 23, 24,
-
-
-        7, 8, 9,
-        25, 26, 27,
-
-        10, 11, 12,
-        28, 29, 30,
-
-        13, 14, 15,
-        31, 32, 33,
-
-        16, 17, 18,
-        34, 35, 36
-    };
-
-    return StackTestHelper<ArmnnType, T, 4>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo,
-        outputTensorInfo,
-        2U,
-        inputData,
-        outputExpectedData
-    );
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Stack4dOutput3AxisTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 3, 2, 3, 2 }, ArmnnType);
-
-    std::vector<std::vector<T>> inputData;
-
-    inputData.push_back(
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-        13, 14, 15,
-        16, 17, 18
-    });
-
-    inputData.push_back(
-    {
-        19, 20, 21,
-        22, 23, 24,
-
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36
-    });
-
-    std::vector<T> outputExpectedData =
-    {
-        1, 19,
-        2, 20,
-        3, 21,
-
-        4, 22,
-        5, 23,
-        6, 24,
-
-
-        7, 25,
-        8, 26,
-        9, 27,
-
-        10, 28,
-        11, 29,
-        12, 30,
-
-
-        13, 31,
-        14, 32,
-        15, 33,
-
-        16, 34,
-        17, 35,
-        18, 36
-    };
-
-    return StackTestHelper<ArmnnType, T, 4>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo,
-        outputTensorInfo,
-        3U,
-        inputData,
-        outputExpectedData
-    );
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Stack3dOutput1Axis3InputTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 3, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 3, 3, 3 }, ArmnnType);
-
-    std::vector<std::vector<T>> inputData;
-
-    inputData.push_back(
-    {
-        1, 2, 3,
-        4, 5, 6,
-        7, 8, 9
-    });
-
-    inputData.push_back(
-    {
-        10, 11, 12,
-        13, 14, 15,
-        16, 17, 18
-    });
-
-    inputData.push_back(
-    {
-        19, 20, 21,
-        22, 23, 24,
-        25, 26, 27
-    });
-
-    std::vector<T> outputExpectedData =
-    {
-        1, 2, 3,
-        10, 11, 12,
-        19, 20, 21,
-
-        4, 5, 6,
-        13, 14, 15,
-        22, 23, 24,
-
-        7, 8, 9,
-        16, 17, 18,
-        25, 26, 27
-    };
-
-    return StackTestHelper<ArmnnType, T, 3>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo,
-        outputTensorInfo,
-        1U,
-        inputData,
-        outputExpectedData
-    );
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 5> Stack5dOutputTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    armnn::TensorInfo inputTensorInfo ({ 2, 2, 2, 3 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 2, 2, 2, 2, 3 }, ArmnnType);
-
-    std::vector<std::vector<T>> inputData;
-
-    inputData.push_back(
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24
-    });
-
-    inputData.push_back(
-    {
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36,
-
-
-        37, 38, 39,
-        40, 41, 42,
-
-        43, 44, 45,
-        46, 47, 48
-    });
-
-    std::vector<T> outputExpectedData =
-    {
-        1, 2, 3,
-        4, 5, 6,
-
-        7, 8, 9,
-        10, 11, 12,
-
-
-        25, 26, 27,
-        28, 29, 30,
-
-        31, 32, 33,
-        34, 35, 36,
-
-
-
-        13, 14, 15,
-        16, 17, 18,
-
-        19, 20, 21,
-        22, 23, 24,
-
-
-        37, 38, 39,
-        40, 41, 42,
-
-        43, 44, 45,
-        46, 47, 48
-
-    };
-
-    return StackTestHelper<ArmnnType, T, 5>(
-        workloadFactory,
-        memoryManager,
-        inputTensorInfo,
-        outputTensorInfo,
-        1U,
-        inputData,
-        outputExpectedData
-    );
-}
+#include <backendsCommon/test/layerTests/TransposeConvolution2dTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/SoftmaxTestImpl.hpp b/src/backends/backendsCommon/test/SoftmaxTestImpl.hpp
deleted file mode 100644 (file)
index 983a53b..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-//
-// Copyright Â© 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include "QuantizeHelper.hpp"
-#include "WorkloadTestUtils.hpp"
-
-#include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
-
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-
-#include <test/TensorHelpers.hpp>
-
-#include <algorithm>
-
-template<armnn::DataType ArmnnType, std::size_t n, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, n> SimpleSoftmaxBaseTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta,
-    const armnn::TensorShape& inputShape,
-    const std::vector<float>& outputData,
-    const std::vector<float>& inputData,
-    int axis = 1)
-{
-    using std::exp;
-
-    const float qScale = 1.f / 256.f;
-    const int qOffset = 0;
-
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-
-    inputTensorInfo = armnn::TensorInfo(inputShape, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(qScale);
-    inputTensorInfo.SetQuantizationOffset(qOffset);
-
-    outputTensorInfo = armnn::TensorInfo(inputShape, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(qScale);
-    outputTensorInfo.SetQuantizationOffset(qOffset);
-
-    LayerTestResult<T, n> ret(outputTensorInfo);
-
-    // Each row is independently softmax'd.
-    auto input = MakeTensor<T, n>(inputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(qScale, qOffset, inputData)));
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::SoftmaxQueueDescriptor data;
-    data.m_Parameters.m_Beta = beta;
-    data.m_Parameters.m_Axis = axis;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), input.origin());
-
-    BOOST_ASSERT(workload);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
-
-    std::vector<T> expectedOutput = std::vector<T>(
-            QuantizedVector<T>(qScale, qOffset, outputData));
-    ret.outputExpected = MakeTensor<T, n>(outputTensorInfo, expectedOutput);
-
-    return ret;
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> SimpleSoftmaxTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta)
-{
-    using std::exp;
-    const armnn::TensorShape inputShape{ 2, 4 };
-
-    float x0[4] = { exp((0.f - 1.0f) * beta), exp((1.0f - 1.0f) * beta),
-                    exp((0.0f - 1.0f) * beta), exp((0.0f - 1.0f) * beta) };
-    float sum0 = x0[0] + x0[1] + x0[2] + x0[3];
-    float x1[4] = { exp((0.5f - 0.5f) * beta), exp((0.0f - 0.5f) * beta),
-                    exp((0.0f - 0.5f) * beta), exp((0.0f - 0.5f) * beta) };
-    float sum1 = x1[0] + x1[1] + x1[2] + x1[3];
-
-    const std::vector<float> outputData = { x0[0] / sum0, x0[1] / sum0, x0[2] / sum0, x0[3] / sum0,
-                                            x1[0] / sum1, x1[1] / sum1, x1[2] / sum1, x1[3] / sum1 };
-
-    const std::vector<float> inputData =
-            {
-                0.f, 1.f, 0.f, 0.f,
-                .5f, 0.f, 0.f, 0.f,
-            };
-
-    return SimpleSoftmaxBaseTestImpl<ArmnnType, 2>(workloadFactory, memoryManager, beta,
-                                                   inputShape, outputData, inputData);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> SimpleSoftmaxTestImpl(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        float beta,
-        int axis)
-{
-    armnn::TensorShape inputShape;
-    std::vector<float> inputData;
-    std::vector<float> outputData;
-    switch (axis)
-    {
-    case -2:
-    case 0:
-        {
-        inputShape = {5, 2};
-
-        inputData =
-                {
-                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
-                };
-
-        outputData =
-                {
-                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
-                        0.087144312427294f,
-                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
-                        7.246299848982885e-08f
-                };
-        break;
-        }
-    case -1:
-    case 1:
-        {
-        inputShape = {2, 5};
-
-        inputData =
-                {
-                        17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
-                };
-
-        outputData =
-                {
-                        0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                        7.246299848982885e-08f,
-                        0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
-                        7.246299848982885e-08f
-                };
-        break;
-        }
-    }
-    return SimpleSoftmaxBaseTestImpl<ArmnnType, 2>(workloadFactory, memoryManager, beta,
-                                                   inputShape, outputData, inputData, axis);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Simple3dSoftmaxTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta,
-    const armnn::TensorShape& inputShape,
-    const std::vector<float>& outputData,
-    const std::vector<float>& inputData,
-    int axis = 1)
-{
-    return SimpleSoftmaxBaseTestImpl<ArmnnType, 3>(workloadFactory, memoryManager, beta,
-                                                   inputShape, outputData, inputData, axis);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Simple4dSoftmaxTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    float beta,
-    const armnn::TensorShape& inputShape,
-    const std::vector<float>& outputData,
-    const std::vector<float>& inputData,
-    int axis = 1)
-{
-
-    return SimpleSoftmaxBaseTestImpl<ArmnnType, 4>(workloadFactory, memoryManager, beta,
-                                                   inputShape, outputData, inputData, axis);
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> CompareSoftmaxTestImpl(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        armnn::IWorkloadFactory& refWorkloadFactory,
-        float beta)
-{
-
-    const int batchSize = 20;
-    const int channels = 30;
-
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-
-    unsigned int inputShape[] = { batchSize, channels };
-
-    inputTensorInfo = armnn::TensorInfo(2, inputShape, ArmnnType);
-    outputTensorInfo = armnn::TensorInfo(2, inputShape, ArmnnType);
-    float qScale = 1.f / 256.f;
-    int qOffset = 0;
-    inputTensorInfo.SetQuantizationScale(qScale);
-    inputTensorInfo.SetQuantizationOffset(qOffset);
-    outputTensorInfo.SetQuantizationScale(qScale);
-    outputTensorInfo.SetQuantizationOffset(qOffset);
-
-
-    LayerTestResult<T, 2> ret(outputTensorInfo);
-    auto input = MakeRandomTensor<T, 2>(inputTensorInfo, 0xF00D, 0.0f, 1.0f);
-
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::SoftmaxQueueDescriptor data;
-    data.m_Parameters.m_Beta = beta;
-
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
-    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
-
-
-    armnn::SoftmaxQueueDescriptor refData = data;
-    armnn::WorkloadInfo refInfo = info;
-    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
-    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info);
-    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateSoftmax(refData, refInfo);
-
-    outputHandleRef->Allocate();
-    inputHandleRef->Allocate();
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
-    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]);
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    workloadRef->Execute();
-
-    CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
-    CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get());
-
-    return ret;
-}
@@ -2,21 +2,22 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "ActivationFixture.hpp"
-#include "QuantizeHelper.hpp"
+#include "ActivationTestImpl.hpp"
+
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/test/ActivationFixture.hpp>
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
+#include <boost/multi_array.hpp>
+
 #include <algorithm>
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
@@ -463,7 +464,8 @@ LayerTestResult<T, 4> SimpleSigmoidTestCommon(
     float qScale,
     int32_t qOffset)
 {
-    std::vector<float> inputData = {
+    std::vector<float> inputData =
+    {
         -0.1f, -0.2f, -0.3f, -0.4f,
         0.1f,  0.2f,  0.3f,  0.4f,
         -1.0f, -2.0f, -3.0f, -4.0f,
diff --git a/src/backends/backendsCommon/test/layerTests/ActivationTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ActivationTestImpl.hpp
new file mode 100644 (file)
index 0000000..fc69cfb
--- /dev/null
@@ -0,0 +1,220 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+//
+// Sigmoid
+//
+
+LayerTestResult<float, 4> SimpleSigmoidTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SimpleSigmoidInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// TanH
+//
+
+LayerTestResult<float, 4> TanhTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> TanhUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> TanhInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// Linear
+//
+
+LayerTestResult<float, 4> ConstantLinearActivationTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> ConstantLinearActivationInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// ReLu
+//
+
+LayerTestResult<float, 4> ReLuTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> ReLuUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> ReLuInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// BoundedReLu
+//
+
+LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float upperBound);
+
+LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float upperBound,
+    float lowerBound);
+
+LayerTestResult<int16_t, 4> BoundedReLuInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(
+    armnn::IWorkloadFactory& workloadFactor,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManagery);
+
+LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> CompareBoundedReLuTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    float upperBound,
+    float lowerBound);
+
+//
+// SoftReLu
+//
+
+LayerTestResult<float, 4> SoftReLuTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SoftReLuUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SoftReLuInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// LeakyReLu
+//
+
+LayerTestResult<float, 4> LeakyReLuTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> LeakyReLuUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> LeakyReLuInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// Abs
+//
+
+LayerTestResult<float, 4> AbsTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> AbsUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> AbsInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// Sqrt
+//
+
+LayerTestResult<float, 4> SqrtTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SqrtUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SqrtInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// Square
+//
+
+LayerTestResult<float, 4> SquareTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SquareUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SquareInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// Other
+//
+
+LayerTestResult<float, 4> CompareActivationTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::ActivationFunction f,
+    unsigned int batchSize);
+
+LayerTestResult<uint8_t, 4> CompareActivationUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::ActivationFunction f);
+
+LayerTestResult<int16_t, 4> CompareActivationInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        armnn::IWorkloadFactory& refWorkloadFactory,
+        armnn::ActivationFunction f);
diff --git a/src/backends/backendsCommon/test/layerTests/BatchNormalizationTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/BatchNormalizationTestImpl.cpp
new file mode 100644 (file)
index 0000000..d8f87e1
--- /dev/null
@@ -0,0 +1,566 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchNormalizationTestImpl.hpp"
+
+#include <DataLayoutIndexed.hpp>
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchNormTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorShape& inputOutputTensorShape,
+    const std::vector<float>& inputValues,
+    const std::vector<float>& expectedOutputValues,
+    float qScale,
+    int32_t qOffset,
+    armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, ArmnnType);
+    armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, ArmnnType);
+
+    armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
+
+    armnn::TensorInfo tensorInfo({ inputOutputTensorShape[dataLayoutIndexed.GetChannelsIndex()] },
+                                 ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        tensorInfo.SetQuantizationScale(qScale);
+        tensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo,
+                                        QuantizedVector<T>(qScale, qOffset, inputValues));
+
+    // These values are per-channel of the input.
+    auto mean     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2}));
+    auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4,  9}));
+    auto beta     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3,  2}));
+    auto gamma    = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2,  1}));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    result.outputExpected = MakeTensor<T, 4>(inputTensorInfo,
+                                             QuantizedVector<T>(qScale, qOffset, expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
+
+    armnn::BatchNormalizationQueueDescriptor descriptor;
+    descriptor.m_Mean                    = &meanTensor;
+    descriptor.m_Variance                = &varianceTensor;
+    descriptor.m_Beta                    = &betaTensor;
+    descriptor.m_Gamma                   = &gammaTensor;
+    descriptor.m_Parameters.m_Eps        = 0.0f;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    armnn::WorkloadInfo info;
+
+    AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
+    AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
+    AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
+    AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T,4> BatchNormTestNhwcImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    const unsigned int width    = 2;
+    const unsigned int height   = 3;
+    const unsigned int channels = 2;
+    const unsigned int num      = 1;
+
+    armnn::TensorInfo inputTensorInfo({num, height, width, channels}, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({num, height, width, channels}, ArmnnType);
+    armnn::TensorInfo tensorInfo({channels}, ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        tensorInfo.SetQuantizationScale(qScale);
+        tensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo,
+        QuantizedVector<T>(qScale, qOffset,
+        {
+            1.f, 1.f, 4.f, 1.f,
+            4.f, 4.f, 2.f, 1.f,
+            1.f, -2.f, 6.f, 4.f
+        }));
+    // These values are per-channel of the input.
+    auto mean     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2}));
+    auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4, 9}));
+    auto beta     = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, 2}));
+    auto gamma    = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2, 1}));
+    LayerTestResult<T,4> ret(outputTensorInfo);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::BatchNormalizationQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
+
+    AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
+    AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
+    AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
+    AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+    data.m_Mean             = &meanTensor;
+    data.m_Variance         = &varianceTensor;
+    data.m_Beta             = &betaTensor;
+    data.m_Gamma            = &gammaTensor;
+    data.m_Parameters.m_Eps = 0.0f;
+    data.m_Parameters.m_DataLayout = armnn::DataLayout::NHWC;
+
+    // For each channel:
+    // substract mean, divide by standard deviation (with an epsilon to avoid div by 0),
+    // multiply by gamma and add beta
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+        QuantizedVector<T>(qScale, qOffset,
+        {
+            1.f, 3.f, 4.f, 3.f,
+            4.f, 4.f, 2.f, 3.f,
+            1.f, 2.f, 6.f, 4.f
+        }));
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+} // anonymous namespace
+
+LayerTestResult<float, 4> BatchNormFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    // BatchSize: 1
+    // Channels: 2
+    // Height: 3
+    // Width: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+         1.f, 4.f,
+         4.f, 2.f,
+         1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+         1.f, 1.f,
+         4.f, 1.f,
+        -2.f, 4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        1.f, 4.f,
+        4.f, 2.f,
+        1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        3.f, 3.f,
+        4.f, 3.f,
+        2.f, 4.f
+    };
+
+    return BatchNormTestImpl<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        inputValues,
+        expectedOutputValues,
+        0.f,
+        0,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<float, 4> BatchNormFloatNhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    // BatchSize: 1
+    // Height: 3
+    // Width: 2
+    // Channels: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f,  1.f,
+        4.f,  1.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f,  4.f,
+        2.f,  1.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, -2.f,
+        6.f,  4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f, 3.f,
+        4.f, 3.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f, 4.f,
+        2.f, 3.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, 2.f,
+        6.f, 4.f
+    };
+
+    return BatchNormTestImpl<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        inputValues,
+        expectedOutputValues,
+        0.f,
+        0,
+        armnn::DataLayout::NHWC);
+}
+
+LayerTestResult<uint8_t, 4> BatchNormUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    // BatchSize: 1
+    // Channels: 2
+    // Height: 3
+    // Width: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+         1.f, 4.f,
+         4.f, 2.f,
+         1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+         1.f, 1.f,
+         4.f, 1.f,
+        -2.f, 4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        1.f, 4.f,
+        4.f, 2.f,
+        1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        3.f, 3.f,
+        4.f, 3.f,
+        2.f, 4.f
+    };
+
+    return BatchNormTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        inputValues,
+        expectedOutputValues,
+        1.f / 20.f,
+        50,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    // BatchSize: 1
+    // Height: 3
+    // Width: 2
+    // Channels: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f,  1.f,
+        4.f,  1.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f,  4.f,
+        2.f,  1.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, -2.f,
+        6.f,  4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f, 3.f,
+        4.f, 3.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f, 4.f,
+        2.f, 3.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, 2.f,
+        6.f, 4.f
+    };
+
+    return BatchNormTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape, inputValues, expectedOutputValues,
+         1.f/20.f, 50, armnn::DataLayout::NHWC);
+}
+
+LayerTestResult<int16_t, 4> BatchNormInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    // BatchSize: 1
+    // Channels: 2
+    // Height: 3
+    // Width: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+         1.f, 4.f,
+         4.f, 2.f,
+         1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+         1.f, 1.f,
+         4.f, 1.f,
+        -2.f, 4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        1.f, 4.f,
+        4.f, 2.f,
+        1.f, 6.f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        3.f, 3.f,
+        4.f, 3.f,
+        2.f, 4.f
+    };
+
+    return BatchNormTestImpl<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        inputValues,
+        expectedOutputValues,
+        1.f / 20.f,
+        50,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<int16_t, 4> BatchNormInt16NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    // BatchSize: 1
+    // Height: 3
+    // Width: 2
+    // Channels: 2
+
+    const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
+    std::vector<float> inputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f,  1.f,
+        4.f,  1.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f,  4.f,
+        2.f,  1.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, -2.f,
+        6.f,  4.f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Height 0, Width (2) x Channel (2)
+        1.f, 3.f,
+        4.f, 3.f,
+
+        // Batch 0, Height 1, Width (2) x Channel (2)
+        4.f, 4.f,
+        2.f, 3.f,
+
+        // Batch 0, Height 2, Width (2) x Channel (2)
+        1.f, 2.f,
+        6.f, 4.f
+    };
+
+    return BatchNormTestImpl<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        inputValues,
+        expectedOutputValues,
+        1.f / 20.f,
+        50,
+        armnn::DataLayout::NHWC);
+}
+
+LayerTestResult<float,4> CompareBatchNormTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory)
+{
+    const unsigned int width     = 2;
+    const unsigned int height    = 3;
+    const unsigned int channels  = 5;
+    const unsigned int batchSize = 3;
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+    armnn::TensorInfo tensorInfo;
+
+    constexpr unsigned int shape[]       = {batchSize, channels, height, width};
+    constexpr unsigned int tensorShape[] = {channels};
+
+    inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
+    outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
+    tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32);
+
+    auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312);
+
+    auto mean     = MakeRandomTensor<float, 1>(tensorInfo, 123);
+    auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f);
+    auto beta     = MakeRandomTensor<float, 1>(tensorInfo, 123);
+    auto gamma    = MakeRandomTensor<float, 1>(tensorInfo, 345);
+
+    LayerTestResult<float,4> ret(outputTensorInfo);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandleRef  = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::BatchNormalizationQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
+    armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
+
+    AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
+    AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
+    AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
+    AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+    data.m_Mean             = &meanTensor;
+    data.m_Variance         = &varianceTensor;
+    data.m_Beta             = &betaTensor;
+    data.m_Gamma            = &gammaTensor;
+    data.m_Parameters.m_Eps = 0.01f;
+
+    armnn::BatchNormalizationQueueDescriptor refData = data;
+    armnn::WorkloadInfo refInfo = info;
+    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
+    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
+    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    inputHandleRef->Allocate();
+    outputHandleRef->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+    workloadRef->PostAllocationConfigure();
+    workloadRef->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
+
+    return ret;
+}
diff --git a/src/backends/backendsCommon/test/layerTests/BatchNormalizationTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/BatchNormalizationTestImpl.hpp
new file mode 100644 (file)
index 0000000..200e5d8
--- /dev/null
@@ -0,0 +1,40 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> BatchNormFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> BatchNormFloatNhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BatchNormUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> BatchNormInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> BatchNormInt16NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> CompareBatchNormTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory);
diff --git a/src/backends/backendsCommon/test/layerTests/BatchToSpaceNdTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/BatchToSpaceNdTestImpl.hpp
new file mode 100644 (file)
index 0000000..67e7cc5
--- /dev/null
@@ -0,0 +1,473 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType,
+        std::size_t InputDim,
+        std::size_t OutputDim,
+        typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, OutputDim> BatchToSpaceNdHelper(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout& dataLayout,
+        const unsigned int *inputShape,
+        const std::vector<float> &inputData,
+        const std::vector<unsigned int> &blockShape,
+        const std::vector<std::pair<unsigned int, unsigned int>> &crops,
+        const unsigned int *outputShape,
+        const std::vector<float> &outputData,
+        float scale = 1.0f,
+        int32_t offset = 0)
+{
+    armnn::TensorInfo inputTensorInfo(InputDim, inputShape, ArmnnType);
+    armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, ArmnnType);
+
+    inputTensorInfo.SetQuantizationScale(scale);
+    inputTensorInfo.SetQuantizationOffset(offset);
+
+    outputTensorInfo.SetQuantizationScale(scale);
+    outputTensorInfo.SetQuantizationOffset(offset);
+
+    auto input = MakeTensor<T, InputDim>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
+
+    LayerTestResult<T, OutputDim> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, OutputDim>(outputTensorInfo,
+                                                     ConvertToDataType<ArmnnType>(outputData, outputTensorInfo));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::BatchToSpaceNdQueueDescriptor data;
+    data.m_Parameters.m_DataLayout = dataLayout;
+    data.m_Parameters.m_BlockShape = blockShape;
+    data.m_Parameters.m_Crops = crops;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchToSpaceNd(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 2, 2, 1};
+    const unsigned int outputShape[] = {1, 4, 4, 1};
+
+    std::vector<float> input({
+                                     // Batch 0, Height 0, Width (2) x Channel (1)
+                                     1.0f, 3.0f,
+                                     // Batch 0, Height 1, Width (2) x Channel (1)
+                                     9.0f, 11.0f,
+
+
+                                     // Batch 1, Height 0, Width (2) x Channel (1)
+                                     2.0f, 4.0f,
+                                     // Batch 1, Height 1, Width (2) x Channel (1)
+                                     10.0f, 12.0f,
+
+
+                                     // Batch 2, Height 0, Width (2) x Channel (1)
+                                     5.0f, 7.0f,
+                                     // Batch 2, Height 1, Width (2) x Channel (1)
+                                     13.0f, 15.0f,
+
+                                     // Batch 3, Height 0, Width (2) x Channel (3)
+                                     6.0f, 8.0f,
+                                     // Batch 3, Height 1, Width (2) x Channel (1)
+                                     14.0f, 16.0f
+                             });
+
+    std::vector<float> expectedOutput({
+                                              1.0f,   2.0f,  3.0f,  4.0f,
+                                              5.0f,   6.0f,  7.0f,  8.0f,
+                                              9.0f,  10.0f, 11.0f,  12.0f,
+                                              13.0f, 14.0f, 15.0f,  16.0f
+                                      });
+
+    std::vector<unsigned int> blockShape {2, 2};
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 2, 2, 1};
+
+    std::vector<float> input({
+                                     // Batch 0, Height 0, Width (2) x Channel (1)
+                                     1.0f, 2.0f, 3.0f, 4.0f
+                             });
+
+    std::vector<float> expectedOutput({1.0f, 2.0f, 3.0f, 4.0f});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 3};
+    const unsigned int outputShape[] = {1, 2, 2, 3};
+
+    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f});
+
+    std::vector<float> expectedOutput({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest4(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {8, 1, 3, 1};
+    const unsigned int outputShape[] = {2, 2, 4, 1};
+
+    std::vector<float> input({
+                                     0.0f, 1.0f, 3.0f,
+                                     0.0f, 9.0f, 11.0f,
+                                     0.0f, 2.0f, 4.0f,
+                                     0.0f, 10.0f, 12.0f,
+                                     0.0f, 5.0f, 7.0f,
+                                     0.0f, 13.0f, 15.0f,
+                                     0.0f, 6.0f, 8.0f,
+                                     0.0f, 14.0f, 16.0f
+                             });
+
+    std::vector<float> expectedOutput({
+                                              1.0f, 2.0f, 3.0f, 4.0f,
+                                              5.0f, 6.0f, 7.0f, 8.0f,
+                                              9.0f, 10.0f, 11.0f, 12.0f,
+                                              13.0f, 14.0f, 15.0f, 16.0f
+                                      });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {2, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest5(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 2, 2, 1};
+    const unsigned int outputShape[] = {1, 4, 4, 1};
+
+    std::vector<float> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+    std::vector<float> expectedOutput({1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager, armnn::DataLayout::NHWC, inputShape,
+                                                 input, blockShape, crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest6(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 2, 2, 1};
+
+    std::vector<float> input({
+                                     // Batch 0, Height 0, Width (2) x Channel (1)
+                                     1, 2, 3, 4
+                             });
+
+    std::vector<float> expectedOutput({1, 2, 3, 4});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                 armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                                 crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNhwcTest7(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 3};
+    const unsigned int outputShape[] = {1, 2, 2, 3};
+
+    std::vector<float> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+
+    std::vector<float> expectedOutput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                 armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                                 crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest1(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f});
+
+    std::vector<float> expectedOutput({
+                                              // Batch 0, Channel 0, Height (2) x Width (2)
+                                              1.0f,  4.0f,
+                                              7.0f, 10.0f,
+
+                                              // Batch 0, Channel 1, Height (2) x Width (2)
+                                              2.0f,  5.0f,
+                                              8.0f, 11.0f,
+
+                                              // Batch 0, Channel 2, Height (2) x Width (2)
+                                              3.0f,  6.0f,
+                                              9.0f, 12.0f,
+                                      });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 1, 2, 2};
+
+    std::vector<float> input({
+                                     // Batch 0, Height 0, Width (2) x Channel (1)
+                                     1.0f, 2.0f, 3.0f, 4.0f
+                             });
+
+    std::vector<float> expectedOutput({1.0f, 2.0f, 3.0f, 4.0f});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<float> input({1.0f, 3.0f, 5.0f, 7.0f, 9.0f, 11.0f, 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f});
+
+    std::vector<float> expectedOutput({
+                                              // Batch 0, Channel 0, Height (2) x Width (2)
+                                              1.0f,  7.0f,
+                                              2.0f,  8.0f,
+
+                                              // Batch 0, Channel 1, Height (2) x Width (2)
+                                              3.0f,  9.0f,
+                                              4.0f, 10.0f,
+
+                                              // Batch 0, Channel 2, Height (2) x Width (2)
+                                              5.0f, 11.0f,
+                                              6.0f, 12.0f,
+                                      });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                                armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                                crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest4(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<float> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+
+    std::vector<float> expectedOutput({
+                                              // Batch 0, Channel 0, Height (2) x Width (2)
+                                              1,  4,
+                                              7, 10,
+
+                                              // Batch 0, Channel 1, Height (2) x Width (2)
+                                              2,  5,
+                                              8, 11,
+
+                                              // Batch 0, Channel 2, Height (2) x Width (2)
+                                              3,  6,
+                                              9, 12,
+                                      });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                 crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest5(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 1, 2, 2};
+
+    std::vector<float> input({
+                                     // Batch 0, Height 0, Width (2) x Channel (1)
+                                     1, 2, 3, 4
+                             });
+
+    std::vector<float> expectedOutput({1, 2, 3, 4});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                 crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest6(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<float> input({1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12});
+
+    std::vector<float> expectedOutput({
+                                              // Batch 0, Channel 0, Height (2) x Width (2)
+                                              1,  7,
+                                              2,  8,
+
+                                              // Batch 0, Channel 1, Height (2) x Width (2)
+                                              3,  9,
+                                              4, 10,
+
+                                              // Batch 0, Channel 2, Height (2) x Width (2)
+                                              5, 11,
+                                              6, 12,
+                                      });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                 crops, outputShape, expectedOutput);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchToSpaceNdNchwTest7(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {8, 1, 1, 3};
+    const unsigned int outputShape[] = {2, 1, 2, 4};
+
+    std::vector<float> input({
+                                     0, 1, 3, 0,  9, 11,
+                                     0, 2, 4, 0, 10, 12,
+                                     0, 5, 7, 0, 13, 15,
+                                     0, 6, 8, 0, 14, 16
+                             });
+
+    std::vector<float> expectedOutput({
+                                              1,  2,  3,  4,
+                                              5,  6,  7,  8,
+                                              9, 10, 11, 12,
+                                              13, 14, 15, 16
+                                      });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {2, 0}};
+
+    return BatchToSpaceNdHelper<ArmnnType, 4, 4>(workloadFactory, memoryManager,
+                                                 armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                                 crops, outputShape, expectedOutput);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/ConcatTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ConcatTestImpl.cpp
new file mode 100644 (file)
index 0000000..3cfbca8
--- /dev/null
@@ -0,0 +1,2786 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConcatTestImpl.hpp"
+
+#include <Permute.hpp>
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+//
+// Helper functions and templates
+//
+
+armnn::OriginsDescriptor CreateDescriptorForConcat(
+    const std::vector<armnn::TensorInfo> & inputTensorInfos,
+    unsigned int concatDim)
+{
+    std::vector<armnn::TensorShape> shapes;
+    shapes.reserve(inputTensorInfos.size());
+    for (const armnn::TensorInfo& it: inputTensorInfos)
+    {
+        shapes.push_back(it.GetShape());
+    }
+
+    return armnn::CreateDescriptorForConcatenation(shapes.begin(), shapes.end(), concatDim);
+}
+
+//
+// Concat is only supported for N and C dimensions for NCHW and the inner most dimension
+// In case of <4 dimensions we need to make sure that the concat dimensions are at least
+// the 3rd slowest iterating one or the inner most dimension.
+//
+
+bool NeedPermuteForConcat(
+    const std::vector<armnn::TensorInfo> & inputTensorInfos,
+    unsigned int concatDim)
+{
+    // See note above. Additionally we expect the input shapes to have the
+    // same number of dimensions.
+    unsigned int nDimensions = 0;
+
+    // Determine the number of dimensions as well as sanity check them
+    // agains test implementation issues.
+    for (auto && tensorInfo : inputTensorInfos)
+    {
+        if (!nDimensions)
+        {
+            nDimensions = tensorInfo.GetShape().GetNumDimensions();
+        }
+        else
+        {
+            BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
+                "Input shapes must have the same number of dimensions");
+        }
+    }
+
+    return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1));
+}
+
+armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape)
+{
+    unsigned int numDims = inputShape.GetNumDimensions();
+    if (numDims >= 3)
+    {
+        // Nothing to do if the inputShape has at least 3 dimensions.
+        return inputShape;
+    }
+
+    std::vector<unsigned int> newDims(size_t(3), 1u);
+    unsigned int expandedBy = 3 - numDims;
+    for (unsigned int i=0; i<numDims; ++i)
+    {
+        newDims[expandedBy+i] = inputShape[i];
+    }
+    return armnn::TensorShape(3u, &newDims[0]);
+}
+
+void Generate3dPermuteVectorForConcat(
+    unsigned int numDimensions,
+    unsigned int & concatDim,
+    std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations)
+{
+    BOOST_ASSERT_MSG(numDimensions <= 3,
+       "Only dimensions 1,2 and 3 are supported by this helper");
+    unsigned int expandedBy = 3 - numDimensions;
+    unsigned int expandedConcatAxis = concatDim + expandedBy;
+
+    if (expandedConcatAxis == 2)
+    {
+        concatDim = 0;
+        armnn::PermutationVector forwardPermutation({1, 2, 0});
+        armnn::PermutationVector reversePermutation({2, 0, 1});
+        permutations = std::make_pair(forwardPermutation, reversePermutation);
+    }
+    else if (expandedConcatAxis == 1)
+    {
+        concatDim = 0;
+        armnn::PermutationVector forwardPermutation({2, 0, 1});
+        armnn::PermutationVector reversePermutation({1, 2, 0});
+        permutations = std::make_pair(forwardPermutation, reversePermutation);
+    }
+    else
+    {
+        BOOST_ASSERT(expandedConcatAxis == 0);
+        concatDim = 0;
+    }
+}
+
+template<typename T> void PermuteTensorData(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::PermutationVector& mappings,
+    armnn::TensorInfo & inputTensorInfo,
+    const T * inputData,
+    std::vector<T>& outputData)
+{
+    BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
+    if (inputData == nullptr)
+    {
+        // Nullptr is an error in the test. By returning without doing the concatenation
+        // I expect the caller to fail the test. It still makes sense to report this as
+        // an assert for Debug builds.
+        return;
+    }
+
+    armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PermuteQueueDescriptor queueDescriptor;
+    queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings};
+    armnn::WorkloadInfo workloadInfo;
+    AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), inputData);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    outputData.resize(outputTensorInfo.GetNumElements());
+    CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
+    inputTensorInfo = outputTensorInfo;
+}
+
+//
+// Permute the input tensors so we can do a supported concatenation.
+// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
+// at the front. Finally this function tells what the output shape
+// of the permuted concatenated tensor is going to be.
+//
+template<typename T> void PermuteInputsForConcat(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    std::vector<armnn::TensorInfo> & inputTensorInfos,
+    std::vector<T *> & inputData,
+    std::vector<std::vector<T>> & inputDataStorage,
+    armnn::PermutationVector & permuteVector,
+    unsigned int & concatDim,
+    armnn::TensorInfo & outputTensorInfo)
+{
+    BOOST_ASSERT_MSG(inputTensorInfos.size() > 1,
+        "Expecting more than one tensor to be concatenated here");
+
+    unsigned int numDims = 0;
+    unsigned int nthInput = 0;
+    const armnn::PermutationVector identity({0, 1, 2});
+
+    std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations =
+        std::make_pair(identity, identity);
+
+    inputDataStorage.resize(inputData.size());
+
+    for (auto && tensorInfo : inputTensorInfos)
+    {
+        if (numDims == 0)
+        {
+            numDims = tensorInfo.GetShape().GetNumDimensions();
+            Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
+
+            // Store the reverese permutation.
+            permuteVector = permutations.second;
+            BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity),
+                "Test logic error, we don't need permutation, so we shouldn't arrive here");
+        }
+        else
+        {
+            BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
+                "All inputs must have the same number of dimensions");
+        }
+
+        armnn::TensorInfo newTensorInfo = tensorInfo;
+        newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
+
+        PermuteTensorData<T>(workloadFactory,
+                             memoryManager,
+                             permutations.first,
+                             newTensorInfo,
+                             inputData[nthInput],
+                             inputDataStorage[nthInput]);
+
+        inputData[nthInput] = inputDataStorage[nthInput].data();
+        inputTensorInfos[nthInput] = newTensorInfo;
+
+        ++nthInput;
+    }
+
+    outputTensorInfo.SetShape(
+        armnnUtils::Permuted(
+            ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
+            permutations.first));
+}
+
+//
+// This is the pair of PermuteInputsForConcat(...) which permutes back
+// the output of the concatenation so we can check it against an expected
+// output.
+//
+template <typename T> void PermuteOutputForConcat(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorInfo & tensorInfo,
+    const armnn::PermutationVector & permuteVector,
+    std::unique_ptr<armnn::ITensorHandle> && inputDataHandle,
+    T * data)
+{
+    BOOST_ASSERT_MSG(data != nullptr, "data must not be null");
+    if (data == nullptr)
+    {
+        // Nullptr is an error in the test. By returning without doing the permutation
+        // I expect the caller to fail the test. It still makes sense to report this as
+        // an assert for Debug builds.
+        return;
+    }
+
+    armnn::TensorInfo resultTensorInfo = tensorInfo;
+    std::vector<T> inputData(tensorInfo.GetNumElements());
+    std::vector<T> outputData;
+
+    CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
+
+    PermuteTensorData<T>(workloadFactory,
+                         memoryManager,
+                         permuteVector,
+                         resultTensorInfo,
+                         &inputData[0],
+                         outputData);
+
+    ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
+}
+
+template<typename T> void Concatenate(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig,
+    std::initializer_list<T *> inputsOrig,
+    const armnn::TensorInfo& outputTensorInfoOrig,
+    T * output,
+    unsigned int concatDim,
+    bool useSubtensor)
+{
+    BOOST_ASSERT_MSG(output != nullptr, "output must not be null");
+    if (output == nullptr)
+    {
+        // Nullptr is an error in the test. By returning without doing the permutation
+        // I expect the caller to fail the test. It still makes sense to report this as
+        // an assert for Debug builds.
+        return;
+    }
+
+    // Saves a copy of the parameters which we might need to change.
+    std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
+    std::vector<T *> inputs            = inputsOrig;
+    armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig;
+
+    armnn::PermutationVector permuteVector{0, 1, 2};
+
+    // Holds and automatically releases memory for the reshaped input data.
+    std::vector<std::vector<T>> tmpInputDataStorage;
+
+    const size_t inputCount = inputTensorInfos.size();
+
+    bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
+
+    if (needPermuteForConcat)
+    {
+        //
+        // We need to permute the inputs, because concatenation along
+        // the requested axis is not supported.
+        //
+        PermuteInputsForConcat<T>(workloadFactory,
+                                  memoryManager,
+                                  inputTensorInfos,
+                                  inputs,
+                                  tmpInputDataStorage,
+                                  permuteVector,
+                                  concatDim,
+                                  outputTensorInfo);
+    }
+
+    armnn::WorkloadInfo workloadInfo;
+
+    std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
+    inputHandles.reserve(inputCount);
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ConcatQueueDescriptor queueDescriptor;
+    armnn::OriginsDescriptor viewsDescriptor = CreateDescriptorForConcat(inputTensorInfos, concatDim);
+    queueDescriptor.m_Parameters = viewsDescriptor;
+
+    if (useSubtensor)
+    {
+        queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
+        for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
+        {
+            queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
+                viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
+        }
+
+        outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+        const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
+        for (unsigned int i = 0; i < inputCount; ++i)
+        {
+            const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i];
+            std::unique_ptr<armnn::ITensorHandle> inputHandle =
+                subTensorsSupported ?
+                    workloadFactory.CreateSubTensorHandle(*outputHandle,
+                                                          inputTensorInfo.GetShape(),
+                                                          queueDescriptor.m_ViewOrigins[i].m_Origin.data()) :
+                    workloadFactory.CreateTensorHandle(inputTensorInfo);
+
+            inputHandles.emplace_back(std::move(inputHandle));
+        }
+
+    }
+    else
+    {
+        for (unsigned int i = 0; i < inputCount; ++i)
+        {
+            std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfos[i]);
+            inputHandles.emplace_back(std::move(inputHandle));
+        }
+    }
+
+    for (unsigned int i = 0; i < inputCount; ++i)
+    {
+        AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
+    }
+
+    AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(queueDescriptor, workloadInfo);
+
+    for (auto& inputHandle : inputHandles)
+    {
+        inputHandle->Allocate();
+    }
+
+    outputHandle->Allocate();
+
+    unsigned int nextInputId = 0;
+    for (auto& inputHandle : inputHandles)
+    {
+        CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
+        ++nextInputId;
+    }
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    if (needPermuteForConcat)
+    {
+        PermuteOutputForConcat<T>(workloadFactory,
+                                  memoryManager,
+                                  outputTensorInfo,
+                                  permuteVector,
+                                  std::move(outputHandle),
+                                  output);
+    }
+    else
+    {
+        CopyDataFromITensorHandle(output, outputHandle.get());
+    }
+}
+
+//
+// Implementation templates
+//
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 1> Concat1dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f }));
+    auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f }));
+    auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f }));
+
+    armnn::TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 1> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { inputTensorInfo, inputTensorInfo, inputTensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   0,
+                   true);
+
+    result.output = MakeTensor<T, 1>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Concat2dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorInfo& outputTensorInfo,
+    unsigned int dimension,
+    const float qScale,
+    const int32_t qOffset)
+{
+    armnn::TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f,
+    }));
+
+    auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        4.0f, 5.0f, 6.0f,
+
+        // Batch 1
+        13.0f, 14.0f, 15.0f,
+    }));
+
+    auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        7.0f, 8.0f, 9.0f,
+
+        // Batch 1
+        16.0f, 17.0f, 18.0f,
+    }));
+
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { inputTensorInfo, inputTensorInfo, inputTensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   true);
+
+    result.output = MakeTensor<T, 2>(outputTensorInfo, output);
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Concat2dDim0TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 0, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f,
+
+        // Batch 2
+        4.0f, 5.0f, 6.0f,
+
+        // Batch 3
+        13.0f, 14.0f, 15.0f,
+
+        // Batch 4
+        7.0f, 8.0f, 9.0f,
+
+        // Batch 5
+        16.0f, 17.0f, 18.0f,
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Concat2dDim1TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 1, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Concat2dDim0DiffInputDimsTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
+    auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f,
+    }));
+
+    armnn::TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset);
+    auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        4.0f, 5.0f, 6.0f,
+
+        // Batch 1
+        13.0f, 14.0f, 15.0f,
+
+        // Batch 0
+        7.0f, 8.0f, 9.0f,
+    }));
+
+    armnn::TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset);
+    auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 1
+        16.0f, 17.0f, 18.0f,
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   0,
+                   true);
+
+    result.output = MakeTensor<T, 2>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f,
+
+        // Batch 2
+        4.0f, 5.0f, 6.0f,
+
+        // Batch 3
+        13.0f, 14.0f, 15.0f,
+
+        // Batch 4
+        7.0f, 8.0f, 9.0f,
+
+        // Batch 5
+        16.0f, 17.0f, 18.0f,
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Concat2dDim1DiffInputDimsTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
+    auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f,
+    }));
+
+    armnn::TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset);
+    auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+
+        // Batch 1
+        13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+    }));
+
+    armnn::TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset);
+    auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        9.0f,
+
+        // Batch 1
+        18.0f
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   1,
+                   true);
+
+    result.output = MakeTensor<T, 2>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+
+        // Batch 1
+        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorInfo& outputTensorInfo,
+    unsigned int dimension,
+    bool useSubtensor,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f
+    }));
+
+    auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        7.0f, 8.0f,
+
+        // Batch 0, Channel 1
+        9.0f, 10.0f,
+
+        // Batch 0, Channel 2
+        11.0f, 12.0f,
+
+        // Batch 1, Channel 0
+        25.0f, 26.0f,
+
+        // Batch 1, Channel 1
+        27.0f, 28.0f,
+
+        // Batch 1, Channel 2
+        29.0f, 30.0f
+    }));
+
+    auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        13.0f, 14.0f,
+
+        // Batch 0, Channel 1
+        15.0f, 16.0f,
+
+        // Batch 0, Channel 2
+        17.0f, 18.0f,
+
+        // Batch 1, Channel 0
+        31.0f, 32.0f,
+
+        // Batch 1, Channel 1
+        33.0f, 34.0f,
+
+        // Batch 1, Channel 2
+        35.0f, 36.0f
+    }));
+
+    LayerTestResult<T, 3> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { inputTensorInfo, inputTensorInfo, inputTensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   useSubtensor);
+
+    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dDim0TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 0, true, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f,
+
+        // Batch 2, Channel 0
+        7.0f, 8.0f,
+
+        // Batch 2, Channel 1
+        9.0f, 10.0f,
+
+        // Batch 2, Channel 2
+        11.0f, 12.0f,
+
+        // Batch 3, Channel 0
+        25.0f, 26.0f,
+
+        // Batch 3, Channel 1
+        27.0f, 28.0f,
+
+        // Batch 3, Channel 2
+        29.0f, 30.0f,
+
+        // Batch 4, Channel 0
+        13.0f, 14.0f,
+
+        // Batch 4, Channel 1
+        15.0f, 16.0f,
+
+        // Batch 4, Channel 2
+        17.0f, 18.0f,
+
+        // Batch 5, Channel 0
+        31.0f, 32.0f,
+
+        // Batch 5, Channel 1
+        33.0f, 34.0f,
+
+        // Batch 5, Channel 2
+        35.0f, 36.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dDim1TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 1, true, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 0, Channel 3
+        7.0f, 8.0f,
+
+        // Batch 0, Channel 4
+        9.0f, 10.0f,
+
+        // Batch 0, Channel 5
+        11.0f, 12.0f,
+
+        // Batch 0, Channel 6
+        13.0f, 14.0f,
+
+        // Batch 0, Channel 7
+        15.0f, 16.0f,
+
+        // Batch 0, Channel 8
+        17.0f, 18.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f,
+
+        // Batch 1, Channel 3
+        25.0f, 26.0f,
+
+        // Batch 1, Channel 4
+        27.0f, 28.0f,
+
+        // Batch 1, Channel 5
+        29.0f, 30.0f,
+
+        // Batch 1, Channel 6
+        31.0f, 32.0f,
+
+        // Batch 1, Channel 7
+        33.0f, 34.0f,
+
+        // Batch 1, Channel 8
+        35.0f, 36.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dDim2TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 2, useSubtensor, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dDim0DiffInputDimsTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType);
+    auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+            // Batch 0, Channel 0
+            1.0f, 2.0f,
+
+            // Batch 0, Channel 1
+            3.0f, 4.0f,
+
+            // Batch 0, Channel 2
+            5.0f, 6.0f,
+
+            // Batch 1, Channel 0
+            19.0f, 20.0f,
+
+            // Batch 1, Channel 1
+            21.0f, 22.0f,
+
+            // Batch 1, Channel 2
+            23.0f, 24.0f
+    }));
+
+    armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType);
+    auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+            // Batch 0, Channel 0
+            7.0f, 8.0f,
+
+            // Batch 0, Channel 1
+            9.0f, 10.0f,
+
+            // Batch 0, Channel 2
+            11.0f, 12.0f,
+    }));
+
+    armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType);
+    auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+            // Batch 0, Channel 0
+            25.0f, 26.0f,
+
+            // Batch 0, Channel 1
+            27.0f, 28.0f,
+
+            // Batch 0, Channel 2
+            29.0f, 30.0f,
+
+            // Batch 1, Channel 0
+            13.0f, 14.0f,
+
+            // Batch 1, Channel 1
+            15.0f, 16.0f,
+
+            // Batch 1, Channel 2
+            17.0f, 18.0f,
+
+            // Batch 2, Channel 0
+            31.0f, 32.0f,
+
+            // Batch 2, Channel 1
+            33.0f, 34.0f,
+
+            // Batch 2, Channel 2
+            35.0f, 36.0f
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType);
+    LayerTestResult<T, 3> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   0,
+                   true);
+
+    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f,
+
+        // Batch 2, Channel 0
+        7.0f, 8.0f,
+
+        // Batch 2, Channel 1
+        9.0f, 10.0f,
+
+        // Batch 2, Channel 2
+        11.0f, 12.0f,
+
+        // Batch 3, Channel 0
+        25.0f, 26.0f,
+
+        // Batch 3, Channel 1
+        27.0f, 28.0f,
+
+        // Batch 3, Channel 2
+        29.0f, 30.0f,
+
+        // Batch 4, Channel 0
+        13.0f, 14.0f,
+
+        // Batch 4, Channel 1
+        15.0f, 16.0f,
+
+        // Batch 4, Channel 2
+        17.0f, 18.0f,
+
+        // Batch 5, Channel 0
+        31.0f, 32.0f,
+
+        // Batch 5, Channel 1
+        33.0f, 34.0f,
+
+        // Batch 5, Channel 2
+        35.0f, 36.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dDim1DiffInputDimsTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
+    auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f
+    }));
+
+    armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset);
+    auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        7.0f, 8.0f,
+
+        // Batch 0, Channel 1
+        9.0f, 10.0f,
+
+        // Batch 0, Channel 2
+        11.0f, 12.0f,
+
+        // Batch 0, Channel 3
+        25.0f, 26.0f,
+
+        // Batch 1, Channel 0
+        27.0f, 28.0f,
+
+        // Batch 1, Channel 1
+        29.0f, 30.0f,
+
+        // Batch 1, Channel 2
+        13.0f, 14.0f,
+
+        // Batch 1, Channel 3
+        15.0f, 16.0f,
+    }));
+
+    armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset);
+    auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        17.0f, 18.0f,
+
+        // Batch 1, Channel 0
+        31.0f, 32.0f,
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset);
+    LayerTestResult<T, 3> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   1,
+                   true);
+
+    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 0, Channel 3
+        7.0f, 8.0f,
+
+        // Batch 0, Channel 4
+        9.0f, 10.0f,
+
+        // Batch 0, Channel 5
+        11.0f, 12.0f,
+
+        // Batch 0, Channel 6
+        25.0f, 26.0f,
+
+        // Batch 0, Channel 7
+        17.0f, 18.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f,
+
+        // Batch 1, Channel 3
+        27.0f, 28.0f,
+
+        // Batch 1, Channel 4
+        29.0f, 30.0f,
+
+        // Batch 1, Channel 5
+        13.0f, 14.0f,
+
+        // Batch 1, Channel 6
+        15.0f, 16.0f,
+
+        // Batch 1, Channel 7
+        31.0f, 32.0f,
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Concat3dDim2DiffInputDimsTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
+    auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f
+    }));
+
+    armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset);
+    auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        7.0f,
+
+        // Batch 0, Channel 1
+        9.0f,
+
+        // Batch 0, Channel 2
+        11.0f,
+
+        // Batch 1, Channel 0
+        25.0f,
+
+        // Batch 1, Channel 1
+        27.0f,
+
+        // Batch 1, Channel 2
+        29.0f
+    }));
+
+    armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset);
+    auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        13.0f, 14.0f, 50.0f,
+
+        // Batch 0, Channel 1
+        15.0f, 16.0f, 51.0f,
+
+        // Batch 0, Channel 2
+        17.0f, 18.0f, 52.0f,
+
+        // Batch 1, Channel 0
+        31.0f, 32.0f, 53.0f,
+
+        // Batch 1, Channel 1
+        33.0f, 34.0f, 54.0f,
+
+        // Batch 1, Channel 2
+        35.0f, 36.0f, 55.0f,
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
+    LayerTestResult<T, 3> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory, memoryManager,
+                   { input0TensorInfo, input1TensorInfo, input2TensorInfo },
+                   { input0.data(), input1.data(), input2.data() },
+                   outputTensorInfo,
+                   output.data(),
+                   2,
+                   useSubtensor);
+
+    result.output = MakeTensor<T, 3>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
+
+        // Batch 0, Channel 1
+        3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
+
+        // Batch 0, Channel 2
+        5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
+
+        // Batch 1, Channel 0
+        19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
+
+        // Batch 1, Channel 1
+        21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
+
+        // Batch 1, Channel 2
+        23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorInfo& outputTensorInfo,
+    unsigned int dimension,
+    bool useSubtensor,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f
+    }));
+
+    auto input1 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f
+    }));
+
+    auto input2 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f,
+        29.0f, 30.0f,
+        31.0f, 32.0f
+    }));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+
+    Concatenate<T>(workloadFactory,
+                   memoryManager,
+                   {inputTensorInfo, inputTensorInfo, inputTensorInfo},
+                   {input0.data(), input1.data(), input2.data()},
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   useSubtensor);
+
+    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDim0TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 0, true, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f,
+
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f,
+        29.0f, 30.0f,
+        31.0f, 32.0f
+    }));
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDim1TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 1, true, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f,
+
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f,
+        29.0f, 30.0f,
+        31.0f, 32.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDim2TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    armnn::TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 2, true, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f,
+
+        9.0f, 10.0f,
+        11.0f, 12.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+        29.0f, 30.0f,
+        31.0f, 32.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDim3TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool useSubtensor)
+{
+    armnn::TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, outputTensorInfo, 3, useSubtensor, qScale, qOffset);
+
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        11.0f, 12.0f,
+        21.0f, 22.0f,
+        3.0f, 4.0f,
+        13.0f, 14.0f,
+        23.0f, 24.0f,
+
+        5.0f, 6.0f,
+        15.0f, 16.0f,
+        25.0f, 26.0f,
+        7.0f, 8.0f,
+        17.0f, 18.0f,
+        27.0f, 28.0f,
+
+        9.0f, 10.0f,
+        19.0f, 20.0f,
+        29.0f, 30.0f,
+        11.0f, 12.0f,
+        21.0f, 22.0f,
+        31.0f, 32.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDiffShapeDim0TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    unsigned int dimension = 0;
+    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f
+    }));
+
+    armnn::TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f,
+        29.0f, 30.0f,
+        31.0f, 32.0f
+
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory,
+                   memoryManager,
+                   {inputTensorInfo0, inputTensorInfo1},
+                   {input0.data(), input1.data()},
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   true);
+
+    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f,
+
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f,
+        29.0f, 30.0f,
+        31.0f, 32.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDiffShapeDim1TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    unsigned int dimension = 1;
+    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f
+    }));
+
+    armnn::TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory,
+                   memoryManager,
+                   {inputTensorInfo0, inputTensorInfo1},
+                   {input0.data(), input1.data()},
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   true);
+
+    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f,
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDiffShapeDim2TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    unsigned int dimension = 2;
+    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f
+    }));
+
+    armnn::TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory,
+                   memoryManager,
+                   {inputTensorInfo0, inputTensorInfo1},
+                   {input0.data(), input1.data()},
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   true);
+
+    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        11.0f, 12.0f,
+        13.0f, 14.0f,
+        15.0f, 16.0f,
+
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        17.0f, 18.0f,
+        19.0f, 20.0f,
+        21.0f, 22.0f,
+
+        9.0f, 10.0f,
+        11.0f, 12.0f,
+        23.0f, 24.0f,
+        25.0f, 26.0f,
+        27.0f, 28.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Concat4dDiffShapeDim3TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool useSubtensor)
+{
+    unsigned int dimension = 3;
+    armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
+
+    auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f,
+        9.0f, 10.0f,
+        11.0f, 12.0f
+    }));
+
+    armnn::TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset);
+
+    auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
+        11.0f, 12.0f, 13.0f,
+        14.0f, 15.0f, 16.0f,
+
+        17.0f, 18.0f, 19.0f,
+        20.0f, 21.0f, 22.0f,
+
+        23.0f, 24.0f, 25.0f,
+        26.0f, 27.0f, 28.0f
+    }));
+
+    armnn::TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset);
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+
+    std::vector<T> output;
+    output.resize(outputTensorInfo.GetNumElements());
+    Concatenate<T>(workloadFactory,
+                   memoryManager,
+                   {inputTensorInfo0, inputTensorInfo1},
+                   {input0.data(), input1.data()},
+                   outputTensorInfo,
+                   output.data(),
+                   dimension,
+                   useSubtensor);
+
+    result.output = MakeTensor<T, 4>(outputTensorInfo, output);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
+        1.0f, 2.0f, 11.0f, 12.0f, 13.0f,
+        3.0f, 4.0f, 14.0f, 15.0f, 16.0f,
+        5.0f, 6.0f, 17.0f, 18.0f, 19.0f,
+        7.0f, 8.0f, 20.0f, 21.0f, 22.0f,
+        9.0f, 10.0f, 23.0f, 24.0f, 25.0f,
+        11.0f, 12.0f, 26.0f, 27.0f, 28.0f
+    }));
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> ConcatDifferentInputOutputQParamTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    // Defines the tensor descriptors.
+    armnn::TensorInfo outputTensorInfo({ 3, 6, 3 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo1({ 3, 6, 2 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo2({ 3, 6, 1 }, ArmnnType);
+
+    std::vector<armnn::TensorShape> inputTensorShapes({inputTensorInfo1.GetShape(), inputTensorInfo2.GetShape()});
+
+    // Quantized input1 tensor.
+    const float inputScale1 = 0.5f;
+    const int32_t inputOffset1 = 5;
+
+    auto input1 = MakeTensor<T, 3>(inputTensorInfo1, std::vector<T>(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27,
+        28, 29, 30,
+        31, 32, 33,
+        34, 35, 36
+    }));
+
+    // Quatized input2 tensor.
+    const float inputScale2 = 0.2f;
+    const int32_t inputOffset2 = 10;
+
+    auto input2 = MakeTensor<T, 3>(inputTensorInfo2, std::vector<T>(
+    {
+        37, 38, 39,
+        40, 41, 42,
+        43, 44, 45,
+        46, 47, 48,
+        49, 50, 51,
+        52, 53, 54
+    }));
+
+    // Quantized output tensor.
+    const float outputScale = 0.1f;
+    const int32_t outputOffset = 20;
+
+    LayerTestResult<T, 3> ret(outputTensorInfo);
+
+    ret.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(
+    {
+        0,   5,  74,
+        10,  15,  76,
+        20,  25,  78,
+        30,  35,  80,
+        40,  45,  82,
+        50,  55,  84,
+
+        60,  65,  86,
+        70,  75,  88,
+        80,  85,  90,
+        90,  95,  92,
+        100, 105,  94,
+        110, 115,  96,
+
+        120, 125,  98,
+        130, 135, 100,
+        140, 145, 102,
+        150, 155, 104,
+        160, 165, 106,
+        170, 175, 108
+    }));
+
+    outputTensorInfo.SetQuantizationScale(outputScale);
+    outputTensorInfo.SetQuantizationOffset(outputOffset);
+    inputTensorInfo1.SetQuantizationScale(inputScale1);
+    inputTensorInfo1.SetQuantizationOffset(inputOffset1);
+    inputTensorInfo2.SetQuantizationScale(inputScale2);
+    inputTensorInfo2.SetQuantizationOffset(inputOffset2);
+
+    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
+    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
+
+    std::vector<unsigned int> wOrigin2 = { 0, 0, 2 }; //Extent of the window is defined by size of input[1].
+    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    bool subTensorsSupported = useSubtensor && workloadFactory.SupportsSubTensors();
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
+            subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo1);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
+            subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo2);
+
+    armnn::ConcatQueueDescriptor data;
+    armnn::OriginsDescriptor desc = armnn::CreateDescriptorForConcatenation(
+            inputTensorShapes.begin(),inputTensorShapes.end(), 2);
+    data.m_Parameters = desc;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_ViewOrigins.push_back(window1);
+    data.m_ViewOrigins.push_back(window2);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
+
+    inputHandle1->Allocate();
+    inputHandle2->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
+    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 3>
+ConcatDifferentInputOutputQParamTest<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 3>
+ConcatDifferentInputOutputQParamTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+//
+// Implementation functions
+//
+
+LayerTestResult<float,3> ConcatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    unsigned int outputWidth = 3;
+    unsigned int outputHeight = 6;
+    unsigned int outputChannels = 3;
+
+    unsigned int inputWidth1 = 3;
+    unsigned int inputHeight1 = 6;
+    unsigned int inputChannels1 = 2;
+
+    unsigned int inputWidth2 = 3;
+    unsigned int inputHeight2 = 6;
+    unsigned int inputChannels2 = 1;
+
+    // Define the tensor descriptors.
+    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
+    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
+    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
+
+    LayerTestResult<float,3> ret(outputTensorInfo);
+
+    ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
+    {
+            1.0f, 2.0f, 3.0f,
+            4.0f, 5.0f, 6.0f,
+            7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f,
+            13.0f, 14.0f, 15.0f,
+            16.0f, 17.0f, 18.0f,
+
+            19.0f, 20.0f, 21.0f,
+            22.0f, 23.0f, 24.0f,
+            25.0f, 26.0f, 27.0f,
+            28.0f, 29.0f, 30.0f,
+            31.0f, 32.0f, 33.0f,
+            34.0f, 35.0f, 36.0f,
+
+            37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f,
+            43.0f, 44.0f, 45.0f,
+            46.0f, 47.0f, 48.0f,
+            49.0f, 50.0f, 51.0f,
+            52.0f, 53.0f, 54.0f,
+        })
+    );
+
+    auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
+        {
+            1.0f, 2.0f, 3.0f,
+            4.0f, 5.0f, 6.0f,
+            7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f,
+            13.0f, 14.0f, 15.0f,
+            16.0f, 17.0f, 18.0f,
+
+            19.0f, 20.0f, 21.0f,
+            22.0f, 23.0f, 24.0f,
+            25.0f, 26.0f, 27.0f,
+            28.0f, 29.0f, 30.0f,
+            31.0f, 32.0f, 33.0f,
+            34.0f, 35.0f, 36.0f,
+        })
+    );
+
+    auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
+        {
+            37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f,
+            43.0f, 44.0f, 45.0f,
+            46.0f, 47.0f, 48.0f,
+            49.0f, 50.0f, 51.0f,
+            52.0f, 53.0f, 54.0f,
+        })
+    );
+
+    std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
+    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
+
+    std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
+    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
+        subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo1);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle2  =
+        subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo2);
+
+    armnn::ConcatQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_ViewOrigins.push_back(window1);
+    data.m_ViewOrigins.push_back(window2);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
+
+    inputHandle1->Allocate();
+    inputHandle2->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
+    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+LayerTestResult<float, 1> Concat1dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat1dTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 2> Concat2dDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 2> Concat2dDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 2> Concat2dDim0DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim0DiffInputDimsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 2> Concat2dDim1DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim1DiffInputDimsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 3> Concat3dDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 3> Concat3dDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 3> Concat3dDim2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat3dDim2TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, useSubtensor, 0.0f, 0);
+}
+
+LayerTestResult<float, 3> Concat3dDim0DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim0DiffInputDimsTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 3> Concat3dDim1DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim1DiffInputDimsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 3> Concat3dDim2DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat3dDim2DiffInputDimsTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, useSubtensor, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDim2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDim2TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDim3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat4dDim3TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0, useSubtensor);
+}
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDiffShapeDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDiffShapeDim1TestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDiffShapeDim2TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat4dDiffShapeDim3TestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0, useSubtensor);
+}
+
+LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    unsigned int outputWidth = 3;
+    unsigned int outputHeight = 6;
+    unsigned int outputChannels = 3;
+
+    unsigned int inputWidth1 = 3;
+    unsigned int inputHeight1 = 6;
+    unsigned int inputChannels1 = 2;
+
+    unsigned int inputWidth2 = 3;
+    unsigned int inputHeight2 = 6;
+    unsigned int inputChannels2 = 1;
+
+    // Defines the tensor descriptors.
+    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
+    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
+    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
+
+    // Quantized input1 tensor. Range [-3, 1]
+    const float inputScale1 = 0.015686f;
+    const int32_t inputOffset1 = 192;
+
+    auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27,
+        28, 29, 30,
+        31, 32, 33,
+        34, 35, 36,
+    })
+    );
+
+    // Quatized input2 tensor. Range [-1, 4]
+    const float inputScale2 = 0.019608f;
+    const int32_t inputOffset2 = 50;
+
+    auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
+    {
+        37, 38, 39,
+        40, 41, 42,
+        43, 44, 45,
+        46, 47, 48,
+        49, 50, 51,
+        52, 53, 54,
+    })
+    );
+
+    // Output has the same quantization parameters than input1,
+    // so that only the requantization of input2 is required
+    const float outputScale = 0.015686f;
+    const int32_t outputOffset = 192;
+
+    LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
+
+    ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27,
+        28, 29, 30,
+        31, 32, 33,
+        34, 35, 36,
+
+        176, 177, 178,
+        179, 181, 182,
+        183, 184, 186,
+        187, 188, 189,
+        191, 192, 193,
+        195, 196, 197,
+    })
+    );
+
+    outputTensorInfo.SetQuantizationScale(outputScale);
+    outputTensorInfo.SetQuantizationOffset(outputOffset);
+    inputTensorInfo1.SetQuantizationScale(inputScale1);
+    inputTensorInfo1.SetQuantizationOffset(inputOffset1);
+    inputTensorInfo2.SetQuantizationScale(inputScale2);
+    inputTensorInfo2.SetQuantizationOffset(inputOffset2);
+
+    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
+    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
+
+    std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
+    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
+            subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo1);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
+            subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo2);
+
+    armnn::ConcatQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_ViewOrigins.push_back(window1);
+    data.m_ViewOrigins.push_back(window2);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
+
+    inputHandle1->Allocate();
+    inputHandle2->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
+    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+LayerTestResult<uint8_t, 3> ConcatUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    unsigned int outputWidth = 3;
+    unsigned int outputHeight = 6;
+    unsigned int outputChannels = 3;
+
+    unsigned int inputWidth1 = 3;
+    unsigned int inputHeight1 = 6;
+    unsigned int inputChannels1 = 2;
+
+    unsigned int inputWidth2 = 3;
+    unsigned int inputHeight2 = 6;
+    unsigned int inputChannels2 = 1;
+
+    // Defines the tensor descriptors.
+    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
+    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
+    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
+
+    // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
+    const float scale = 0.13497836f;
+    const int32_t offset = -7;
+
+    outputTensorInfo.SetQuantizationScale(scale);
+    outputTensorInfo.SetQuantizationOffset(offset);
+    inputTensorInfo1.SetQuantizationScale(scale);
+    inputTensorInfo1.SetQuantizationOffset(offset);
+    inputTensorInfo2.SetQuantizationScale(scale);
+    inputTensorInfo2.SetQuantizationOffset(offset);
+
+    LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
+
+    ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
+        {
+            1, 2, 3,
+            4, 5, 6,
+            7, 8, 9,
+            10, 11, 12,
+            13, 14, 15,
+            16, 17, 18,
+
+            19, 20, 21,
+            22, 23, 24,
+            25, 26, 27,
+            28, 29, 30,
+            31, 32, 33,
+            34, 35, 36,
+
+            37, 38, 39,
+            40, 41, 42,
+            43, 44, 45,
+            46, 47, 48,
+            49, 50, 51,
+            52, 53, 54,
+        })
+    );
+
+    auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27,
+        28, 29, 30,
+        31, 32, 33,
+        34, 35, 36,
+    })
+    );
+
+    auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
+    {
+        37, 38, 39,
+        40, 41, 42,
+        43, 44, 45,
+        46, 47, 48,
+        49, 50, 51,
+        52, 53, 54,
+    })
+    );
+
+    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
+    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
+
+    std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
+    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
+
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
+        subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo1);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
+        subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo2);
+
+
+    armnn::ConcatQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_ViewOrigins.push_back(window1);
+    data.m_ViewOrigins.push_back(window2);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
+
+    inputHandle1->Allocate();
+    inputHandle2->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
+    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+LayerTestResult<uint16_t, 3> ConcatUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    unsigned int outputWidth = 3;
+    unsigned int outputHeight = 6;
+    unsigned int outputChannels = 3;
+
+    unsigned int inputWidth1 = 3;
+    unsigned int inputHeight1 = 6;
+    unsigned int inputChannels1 = 2;
+
+    unsigned int inputWidth2 = 3;
+    unsigned int inputHeight2 = 6;
+    unsigned int inputChannels2 = 1;
+
+    // Defines the tensor descriptors.
+    armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedSymm16);
+    armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedSymm16);
+    armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedSymm16);
+
+    // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
+    const float scale = 0.13497836f;
+    const int32_t offset = -7;
+
+    outputTensorInfo.SetQuantizationScale(scale);
+    outputTensorInfo.SetQuantizationOffset(offset);
+    inputTensorInfo1.SetQuantizationScale(scale);
+    inputTensorInfo1.SetQuantizationOffset(offset);
+    inputTensorInfo2.SetQuantizationScale(scale);
+    inputTensorInfo2.SetQuantizationOffset(offset);
+
+    LayerTestResult<uint16_t, 3> ret(outputTensorInfo);
+
+    ret.outputExpected = MakeTensor<uint16_t, 3>(outputTensorInfo, std::vector<uint16_t>(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27,
+        28, 29, 30,
+        31, 32, 33,
+        34, 35, 36,
+
+        37, 38, 39,
+        40, 41, 42,
+        43, 44, 45,
+        46, 47, 48,
+        49, 50, 51,
+        52, 53, 54,
+    }));
+
+    auto input1 = MakeTensor<uint16_t, 3>(inputTensorInfo1, std::vector<uint16_t>(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27,
+        28, 29, 30,
+        31, 32, 33,
+        34, 35, 36,
+    }));
+
+    auto input2 = MakeTensor<uint16_t, 3>(inputTensorInfo2, std::vector<uint16_t>(
+    {
+        37, 38, 39,
+        40, 41, 42,
+        43, 44, 45,
+        46, 47, 48,
+        49, 50, 51,
+        52, 53, 54,
+    }));
+
+    std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
+    armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
+
+    std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
+    armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
+
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    bool subTensorsSupported = workloadFactory.SupportsSubTensors();
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
+            subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo1);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
+            subTensorsSupported ?
+            workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
+            workloadFactory.CreateTensorHandle(inputTensorInfo2);
+
+
+    armnn::ConcatQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_ViewOrigins.push_back(window1);
+    data.m_ViewOrigins.push_back(window2);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
+
+    inputHandle1->Allocate();
+    inputHandle2->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
+    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+LayerTestResult<uint8_t, 1> Concat1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat1dTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 2> Concat2dDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 2> Concat2dDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 2> Concat2dDim0DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim0DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 2> Concat2dDim1DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat2dDim1DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 3> Concat3dDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 3> Concat3dDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 3> Concat3dDim2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat3dDim2TestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, useSubtensor, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 3> Concat3dDim0DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 3> Concat3dDim1DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat3dDim1DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 3> Concat3dDim2DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat3dDim2DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, useSubtensor, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDim2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDim2TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDim3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, bool useSubtensor)
+{
+    return Concat4dDim3TestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1, useSubtensor);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDiffShapeDim0TestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDiffShapeDim1TestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Concat4dDiffShapeDim2TestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1);
+}
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor)
+{
+    return Concat4dDiffShapeDim3TestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5f, -1, useSubtensor);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/ConcatTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ConcatTestImpl.hpp
new file mode 100644 (file)
index 0000000..421d03a
--- /dev/null
@@ -0,0 +1,205 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> ConcatDifferentInputOutputQParamTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<float, 3> ConcatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> ConcatUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint16_t, 3> ConcatUint16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 1> Concat1dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> Concat2dDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> Concat2dDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> Concat2dDim0DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> Concat2dDim1DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> Concat3dDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> Concat3dDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> Concat3dDim2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<float, 3> Concat3dDim0DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> Concat3dDim1DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> Concat3dDim2DiffInputDimsTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<float, 4> Concat4dDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> Concat4dDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> Concat4dDim2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> Concat4dDim3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim0Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> Concat4dDiffShapeDim3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<uint8_t, 4> Concat4dDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Concat4dDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Concat4dDim2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Concat4dDim3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<uint8_t, 1> Concat1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> Concat2dDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> Concat2dDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> Concat2dDim0DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> Concat2dDim1DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> Concat3dDim0Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> Concat3dDim1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> Concat3dDim2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<uint8_t, 3> Concat3dDim0DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> Concat3dDim1DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> Concat3dDim2DiffInputDimsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<uint8_t, 3> ConcatDifferentInputOutputQParamUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
+
+LayerTestResult<int16_t, 3> ConcatDifferentInputOutputQParamInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool useSubtensor);
diff --git a/src/backends/backendsCommon/test/layerTests/ConstantTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ConstantTestImpl.cpp
new file mode 100644 (file)
index 0000000..c3cacd5
--- /dev/null
@@ -0,0 +1,154 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConstantTestImpl.hpp"
+
+#include <Permute.hpp>
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ConstantTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    constexpr unsigned int inputWidth = 3;
+    constexpr unsigned int inputHeight = 4;
+    constexpr unsigned int inputChannels = 3;
+    constexpr unsigned int inputBatchSize = 2;
+
+    constexpr unsigned int outputWidth = inputWidth;
+    constexpr unsigned int outputHeight = inputHeight;
+    constexpr unsigned int outputChannels = inputChannels;
+    constexpr unsigned int outputBatchSize = inputBatchSize;
+
+    armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
+                                        ArmnnType, qScale, qOffset);
+
+    armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
+                                         ArmnnType, qScale, qOffset);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+        // Batch 0, Channel 0
+        235.0f,  46.0f, 178.0f,
+        100.0f, 123.0f,  19.0f,
+        172.0f,  74.0f, 250.0f,
+          6.0f, 195.0f,  80.0f,
+
+        // Batch 0, Channel 1
+        113.0f,  95.0f, 202.0f,
+         77.0f, 114.0f,  71.0f,
+        122.0f, 246.0f, 166.0f,
+         82.0f,  28.0f,  37.0f,
+
+        // Batch 0, Channel 2
+         56.0f, 170.0f, 162.0f,
+        194.0f,  89.0f, 254.0f,
+         12.0f, 209.0f, 200.0f,
+          1.0f,  64.0f,  54.0f,
+
+        // Batch 1, Channel 0
+         67.0f,  90.0f,  49.0f,
+          7.0f, 163.0f,  18.0f,
+         25.0f, 117.0f, 103.0f,
+        247.0f,  59.0f, 189.0f,
+
+        // Batch 1, Channel 1
+        239.0f, 104.0f, 199.0f,
+         17.0f, 124.0f, 153.0f,
+        222.0f, 217.0f, 75.0f,
+         32.0f, 126.0f, 21.0f,
+
+        // Batch 1, Channel 2
+         97.0f, 145.0f, 215.0f,
+        115.0f, 116.0f, 238.0f,
+        226.0f,  16.0f, 132.0f,
+         92.0f, 125.0f,  88.0f,
+    })));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = input;
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo);
+    AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]);
+
+    armnn::ConstantQueueDescriptor descriptor;
+    descriptor.m_LayerOutput = &constantTensor;
+
+    armnn::WorkloadInfo info;
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info);
+
+    outputHandle->Allocate();
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+} // anonymous namespace
+
+LayerTestResult<float, 4> ConstantTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return ConstantTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<int16_t, 4> ConstantInt16SimpleQuantizationScaleNoOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return ConstantTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<uint8_t, 4> ConstantUint8SimpleQuantizationScaleNoOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return ConstantTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<uint8_t, 4> ConstantUint8CustomQuantizationScaleAndOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return ConstantTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 2e-6f, 1);
+}
+
+LayerTestResult<int16_t, 4> ConstantInt16CustomQuantizationScaleAndOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return ConstantTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 2e-6f, 1);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/ConstantTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ConstantTestImpl.hpp
new file mode 100644 (file)
index 0000000..fa3e228
--- /dev/null
@@ -0,0 +1,31 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> ConstantTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> ConstantUint8SimpleQuantizationScaleNoOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> ConstantInt16SimpleQuantizationScaleNoOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> ConstantUint8CustomQuantizationScaleAndOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> ConstantInt16CustomQuantizationScaleAndOffsetTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
new file mode 100644 (file)
index 0000000..01c1b18
--- /dev/null
@@ -0,0 +1,3145 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Conv2dTestImpl.hpp"
+
+#include <DataLayoutIndexed.hpp>
+#include <Permute.hpp>
+#include <TensorUtils.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <string>
+
+//
+// Static data
+//
+
+// 2-channel bias used by a number of Conv2d tests.
+static std::vector<float> Bias2({0, 2});
+
+static std::vector<float> Bias4({1, 2, 3, 4});
+
+static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
+
+// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
+static std::vector<float> ConvInput3x8x16({
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+});
+
+//
+// Helper templates
+//
+
+// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
+{
+    if(biasEnabled)
+    {
+        armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
+        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias2));
+        return bias;
+    }
+    else
+    {
+        return boost::multi_array<T, 1>();
+    }
+}
+
+// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
+{
+    if(biasEnabled)
+    {
+        armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
+        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias4));
+        return bias;
+    }
+    else
+    {
+        return boost::multi_array<T, 1>();
+    }
+}
+
+// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
+{
+    if(biasEnabled)
+    {
+        armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
+        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias8));
+        return bias;
+    }
+    else
+    {
+        return boost::multi_array<T, 1>();
+    }
+}
+
+// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
+{
+    const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
+    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
+
+    switch (outputChannels)
+    {
+        case 2:
+        default:
+        {
+            return GetBias2<ArmnnType>(biasEnabled, qScale);
+        }
+        case 4:
+        {
+            return GetBias4<ArmnnType>(biasEnabled, qScale);
+        }
+        case 8:
+        {
+            return GetBias8<ArmnnType>(biasEnabled, qScale);
+        }
+    }
+}
+
+//
+// Implementation templates
+//
+
+// Mapping from input type to bias type for fully connected layers.
+// float => float, uint8_t => int32_t
+template<typename T>
+struct FullyConnectedBiasTypeForInputType;
+
+template<>
+struct FullyConnectedBiasTypeForInputType<float>
+{
+    using Type = float;
+};
+
+template<>
+struct FullyConnectedBiasTypeForInputType<uint8_t>
+{
+    using Type = int32_t;
+};
+
+// Modifies a std::vector in-place using a specified bias.
+template<typename T, typename B>
+void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
+    const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
+{
+    BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
+                     "Invalid type and parameter combination.");
+    BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
+                     "Invalid type and parameter combination.");
+
+    // Note we need to dequantize and re-quantize the image value and the bias.
+    for (uint32_t i = 0; i < bias.size(); ++i)
+    {
+        float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
+        for (uint32_t y = 0; y < h; ++y)
+        {
+            for (uint32_t x = 0; x < w; ++x)
+            {
+                uint32_t offset = (i * h + y) * w + x;
+                BOOST_ASSERT(offset < v.size());
+                T& outRef = v[offset];
+                float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
+                outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
+            }
+        }
+    }
+}
+
+//
+// Convolution2d implementations
+//
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
+LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const boost::multi_array<T, 4>& originalInput,
+    const boost::multi_array<T, 4>& originalKernel,
+    const boost::multi_array<B, 1>& bias,
+    const boost::multi_array<T, 4>& originalOutputExpected,
+    float qScale,
+    int32_t qOffset,
+    const armnn::DataLayout layout = armnn::DataLayout::NCHW,
+    uint32_t padLeft = 0,
+    uint32_t padTop = 0,
+    uint32_t padRight = 0,
+    uint32_t padBottom = 0,
+    uint32_t strideX = 1,
+    uint32_t strideY = 1,
+    uint32_t dilationX = 1,
+    uint32_t dilationY = 1)
+{
+    unsigned int inputHeight   = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
+    unsigned int inputWidth    = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
+    unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
+    unsigned int inputNum      = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
+
+    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
+    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
+    unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
+    unsigned int outputNum      = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
+
+    unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
+    unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
+    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
+    unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
+
+    bool biasEnabled = bias.size() > 0;
+
+    // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
+    BOOST_ASSERT(inputNum == 1);
+    BOOST_ASSERT(outputNum == 1);
+
+    // If a bias is used, its size must equal the number of output channels.
+    BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
+
+
+    // Note these tensors will use two (identical) batches.
+    armnn::TensorInfo inputTensorInfo =
+            armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo =
+            armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
+    armnn::TensorInfo kernelDesc =
+            armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
+    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        kernelDesc.SetQuantizationScale(qScale);
+        kernelDesc.SetQuantizationOffset(qOffset);
+        biasDesc.SetQuantizationScale(qScale*qScale);
+        biasDesc.SetQuantizationOffset(0);
+    }
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+
+    // Construct input data - two batches of the same input image.
+    std::vector<T> inputImage;
+    inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
+    std::vector<T> inputData;
+    inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
+    inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
+
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
+        inputData = tmp;
+    }
+
+    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    std::vector<T> outputImage;
+    outputImage.assign(originalOutputExpected.data(),
+            originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
+
+    // Apply bias to output image if it is enabled.
+    if(biasEnabled)
+    {
+        std::vector<T> biasV;
+        biasV.assign(bias.data(), bias.data() + outputChannels);
+        ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+            biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+            outputWidth, outputHeight);
+    }
+
+    // Construct expected output data - two identical images.
+    std::vector<T> outputData;
+    outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
+    outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
+
+    // at this point if we require it permute the expected output
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
+        outputData = tmp;
+    }
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::Convolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+    // Permute the kernel if necessary
+    boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
+    }
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+
+    if(biasEnabled)
+    {
+        AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+    }
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
+    data.m_Parameters.m_StrideX = strideX;
+    data.m_Parameters.m_StrideY = strideY;
+    data.m_Parameters.m_PadLeft = padLeft;
+    data.m_Parameters.m_PadRight = padRight;
+    data.m_Parameters.m_PadTop = padTop;
+    data.m_Parameters.m_PadBottom = padBottom;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout;
+    data.m_Parameters.m_DilationX = dilationX;
+    data.m_Parameters.m_DilationY = dilationY;
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
+LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const boost::multi_array<T, 4>& input,
+    const boost::multi_array<T, 4>& kernel,
+    const boost::multi_array<B, 1>& bias,
+    const boost::multi_array<T, 4>& outputExpected,
+    const armnn::DataLayout dataLayout,
+    float qScale,
+    int32_t qOffset,
+    uint32_t padLeft = 1,
+    uint32_t padTop = 1,
+    uint32_t padRight = 1,
+    uint32_t padBottom = 1,
+    uint32_t strideX  = 1,
+    uint32_t strideY  = 1)
+{
+    unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
+    unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[3]);
+    unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[1]);
+    unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[2]);
+
+    unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
+    unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+    unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+
+    unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
+    unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
+    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
+    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
+
+    bool biasEnabled = bias.size() > 0;
+
+    // Creates the tensors.
+    armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
+                                       ArmnnType);
+    armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
+    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
+
+    // Construct the input data.
+    std::vector<T> inputData;
+    inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
+    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    // Construct the output data, with bias applied, as appropriate.
+    std::vector<T> outputData;
+    outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+    armnn::Convolution2dQueueDescriptor data;
+
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
+    data.m_Parameters.m_StrideX = strideX;
+    data.m_Parameters.m_StrideY = strideY;
+    data.m_Parameters.m_PadLeft = padLeft;
+    data.m_Parameters.m_PadRight = padRight;
+    data.m_Parameters.m_PadTop = padTop;
+    data.m_Parameters.m_PadBottom = padBottom;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = dataLayout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T,4> Convolution1dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled)
+{
+    using B = armnn::ResolveType<ArmnnBType>;
+    // Until we have a specialist 1D convolution layer, we can fake one using
+    // 2D convolution with the final dimension set to 1.
+    // I don't anticipate this being particularly slow, given that convolution is implemented
+    // as a matrix multiplication, at which point dimension doesn't matter.
+
+    unsigned int batchSize      = 1;
+    unsigned int inputChannels  = 2;
+    unsigned int outputChannels = 3;
+    unsigned int inputSize      = 5; // The 1D size (could view as 'width' or 'height').
+    unsigned int kernelSize     = 3;
+    unsigned int padSize        = 2;
+    unsigned int stride         = 1;
+    unsigned int outputSize     = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
+
+    armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
+    armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
+    armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
+    armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputInfo.SetQuantizationScale(qScale);
+        inputInfo.SetQuantizationOffset(qOffset);
+        outputInfo.SetQuantizationScale(qScale);
+        outputInfo.SetQuantizationOffset(qOffset);
+        kernelInfo.SetQuantizationScale(qScale);
+        kernelInfo.SetQuantizationOffset(qOffset);
+        biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
+        biasInfo.SetQuantizationOffset(0);
+    }
+
+    std::vector<T> inputData(
+        QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), {
+            5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
+            -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
+        }));
+
+    std::vector<T> kernelData(
+        QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), {
+            1.0f, 0.0f, 0.0f,
+            0.0f, 2.0f, -1.5f,
+
+            0.0f, 0.0f, 0.0f,
+            0.2f, 0.2f, 0.2f,
+
+            0.5f, 0.0f, 0.5f,
+            0.0f, -1.0f, 0.0f
+        }));
+
+    std::vector<B> biasData(
+        QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), {
+            1.0f, 0.0f, 0.0f
+        }));
+
+    std::vector<T> outputData(
+        QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), {
+            4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
+            -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
+            2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
+        }));
+
+    // Optionally apply bias to output image.
+    if(biasEnabled)
+    {
+        ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
+            biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
+            1, outputSize);
+    }
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
+
+    armnn::Convolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle         weightsTensor(kernelInfo);
+    armnn::ScopedCpuTensorHandle         biasTensor(biasInfo);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
+    AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
+
+    AddInputToWorkload(data, info, inputInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
+
+    data.m_Weight         = &weightsTensor;
+    data.m_Bias           = &biasTensor;
+    data.m_Parameters.m_StrideX        = 1;
+    data.m_Parameters.m_StrideY        = stride;
+    data.m_Parameters.m_PadLeft        = 0;
+    data.m_Parameters.m_PadRight       = 0;
+    data.m_Parameters.m_PadTop         = padSize;
+    data.m_Parameters.m_PadBottom      = padSize;
+    data.m_Parameters.m_BiasEnabled    = biasEnabled;
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), inputData.data());
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    // Output
+    LayerTestResult<T,4> ret(outputInfo);
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled,
+    armnn::DataLayout dataLayout)
+{
+    // Use common single-batch 5x5 image.
+
+    armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
+                                                      {
+                                                       1, 5, 2, 3,
+                                                       8, 7, 3, 6,
+                                                       3, 3, 9, 1
+                                                       });
+
+
+    // Use a 2-element batch of 3-channel 3x3 kernels.
+    armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
+    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
+                                                                    4, 5, 6,
+                                                                    0, 0, 0,
+                                                                    3, 2, 1
+                                                                    });
+
+    // Expected output is 1 batch of a 5x5 image.
+    armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
+
+    const std::vector<float> outputData =
+            {
+                    23, 41, 33, 21,
+                    44, 65, 76, 52,
+                    82, 85, 79, 42
+            };
+
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
+
+    return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        boost::multi_array<T, 1>(),
+        expectedOutput,
+        dataLayout,
+        qScale,
+        qOffset);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float qScale,
+        int32_t qOffset,
+        bool biasEnabled,
+        const armnn::DataLayout& dataLayout)
+{
+    // Input is a single-batch, 1 channel, 5x5 image.
+    armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
+            {
+                1, 5, 2, 3, 5,
+                8, 7, 3, 6, 3,
+                3, 3, 9, 1, 9,
+                4, 1, 8, 1, 3,
+                6, 8, 1, 9, 2
+            });
+
+    // Use a 3x3 kernel.
+    armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
+    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
+            {
+                4, 5, 6,
+                0, 0, 0,
+                3, 2, 1
+            });
+
+    // Expected output is a single-batch, 1 channel, 3x3 image.
+    armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
+
+    const std::vector<T> outputData =
+            {
+                23, 33, 24,
+                91, 99, 48,
+                26, 50, 19
+            };
+
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
+
+    uint32_t padLeft = 1;
+    uint32_t padTop = 1;
+    uint32_t padRight = 1;
+    uint32_t padBottom = 1;
+    uint32_t strideX  = 2;
+    uint32_t strideY  = 2;
+
+    return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        boost::multi_array<T, 1>(),
+        expectedOutput,
+        dataLayout,
+        qScale,
+        qOffset,
+        padLeft,
+        padTop,
+        padRight,
+        padBottom,
+        strideX,
+        strideY);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    // Use common single-batch 3-channel 16x8 image.
+    armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
+
+    // Use a 2-element batch with 3-channel 3x5 kernels.
+    armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
+    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            1, 1, 1,
+            1, -1, 1,
+            1, 1, 1,
+            1, 1, 1,
+            1, 1, 1,
+
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+
+            2, 2, 2,
+            2, 2, 2,
+            2, 2, 2,
+            2, 2, 2,
+            2, 2, 2,
+
+
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+
+            1, 1, 1,
+            1, 1, 1,
+            1, 1, 1,
+            1, 1, 1,
+            1, 1, 1,
+
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0
+        })));
+
+    // Expected output is 2 batch elements of a 1-channel 14x4 image.
+    armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+            -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
+            -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
+            -23.5f, -23.5f, -23.5f,
+            -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
+            -23.5f, -23.5f, -23.5f,
+
+            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        })));
+
+    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
+
+    // Use common single-batch 3-channel 16x8 image.
+    armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
+
+    // Use a 2-element batch of 3-channel 3x3 kernels.
+    armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
+    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            1, 1, 1,
+            1, -1, 1,
+            1, 1, 1,
+
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+
+            2, 2, 2,
+            2, 2, 2,
+            2, 2, 2,
+
+
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0,
+
+            1, 1, 1,
+            1, 1, 1,
+            1, 1, 1,
+
+            0, 0, 0,
+            0, 0, 0,
+            0, 0, 0
+        })));
+
+    // Expected output is 1 batch of a 2-channel 14x6 image.
+    armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
+            -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
+            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
+            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
+            -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
+
+            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        })));
+
+    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout,
+    float qScale,
+    int32_t qOffset)
+{
+    // Use a single-batch 1-channel 3x3 image as input.
+    armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            11,21,31,
+            12,22,32,
+            13,23,33
+        })));
+
+    // Use 1 batch of a 1-channel 2x2 kernel.
+    armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
+    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            -11,-21,
+            -12,-22,
+        })));
+
+// Expected output is 1 batch of a 1-channel 6x8 image.
+// Manually calculated like this:
+//[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
+//[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
+//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
+//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
+//[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
+//[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
+//[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
+    armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+               0,    0,      0,    0,    0,    0,
+            -242,  -594,  -934, -372,    0,    0,
+            -495, -1190, -1850, -725,    0,    0,
+            -538, -1256, -1916, -748,    0,    0,
+            -273, -626,  -946,  -363,    0,    0,
+               0,    0,     0,     0,    0,    0,
+               0,    0,     0,     0,    0,    0,
+               0,    0,     0,     0,    0,    0
+        })));
+
+    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(false, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout,
+        1,  // Padding left.
+        2,  // Padding top.
+        3,  // Padding right.
+        4); // Padding bottom.
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout,
+    float qScale,
+    int32_t qOffset)
+{
+    // Use a single-batch 1-channel 5x5 image as input.
+    armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            11,21,31,41,51,
+            12,22,32,42,52,
+            13,23,33,43,53,
+            14,24,34,44,54,
+            15,25,35,45,55,
+        })));
+
+    // Use 1 batch of a 1-channel 4x4 kernel.
+    armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
+    boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            -11,-21,-31,-41,
+            -12,-22,-32,-42,
+            -13,-23,-33,-43,
+            -14,-24,-34,-44,
+        })));
+
+    // Expected output is 1 batch of a 1-channel 5x5 image.
+    armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
+    std::vector<T> myVec(outputDesc.GetNumElements(), 0);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, {
+            -7140, -10580, -13940,  -9300, -5230,
+            -9590, -14120, -18520, -12290, -6860,
+            -9980, -14560, -18960, -12560, -7000,
+            -7518, -10904, -14144,  -9318, -5152,
+            -5032,  -7256,  -9376,  -6142, -3368,
+        })));
+
+    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(false, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout,
+        1,  // Padding left.
+        1,  // Padding top.
+        2,  // Padding right.
+        2); // Padding bottom.
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const std::vector<float>& inputNoQuantizedValues,
+    armnn::TensorInfo& inputTensorInfo,
+    const std::vector<float>& kernelNoQuantizedValues,
+    armnn::TensorInfo& kernelTensorInfo,
+    const std::vector<float>& outputExpectedNoQuantizedValues,
+    armnn::TensorInfo& outputTensorInfo,
+    uint32_t dilationX,
+    uint32_t dilationY,
+    armnn::DataLayout layout = armnn::DataLayout::NCHW,
+    uint32_t padLeft = 0,
+    uint32_t padTop = 0,
+    uint32_t padRight = 0,
+    uint32_t padBottom = 0,
+    uint32_t strideX  = 1,
+    uint32_t strideY  = 1,
+    bool biasEnabled = false
+)
+{
+    float qScale;
+    int32_t qOffset;
+    switch (ArmnnType)
+    {
+        case armnn::DataType::QuantisedAsymm8:
+        {
+            qScale = 0.1f;
+            qOffset = 128;
+            break;
+        }
+        case armnn::DataType::QuantisedSymm16:
+        {
+            qScale = 0.1f;
+            qOffset = 0;
+            break;
+        }
+        case armnn::DataType::Float32:
+        default:
+        {
+            qScale = 0.f;
+            qOffset = 0;
+            break;
+        }
+    }
+
+    inputTensorInfo.SetQuantizationScale(qScale);
+    inputTensorInfo.SetQuantizationOffset(qOffset);
+    kernelTensorInfo.SetQuantizationScale(qScale);
+    kernelTensorInfo.SetQuantizationOffset(qOffset);
+    outputTensorInfo.SetQuantizationScale(qScale);
+    outputTensorInfo.SetQuantizationOffset(qOffset);
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo,
+                                  std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                    inputTensorInfo.GetQuantizationOffset(),
+                                                                    inputNoQuantizedValues)));
+    auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
+                                  std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
+                                                                    kernelTensorInfo.GetQuantizationOffset(),
+                                                                    kernelNoQuantizedValues)));
+    auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
+                                           std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                             outputTensorInfo.GetQuantizationOffset(),
+                                                                             outputExpectedNoQuantizedValues)));
+
+    return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            input,
+            kernel,
+            GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
+            expectedOutput,
+            qScale,
+            qOffset,
+            layout,
+            padLeft,
+            padTop,
+            padRight,
+            padBottom,
+            strideX,
+            strideY,
+            dilationX,
+            dilationY);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
+    std::vector<float> kernelNoQuantizedValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    };
+
+    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
+    // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
+    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+    {
+        6., 5., 5., 5.,
+        6., 5., 5., 5.,
+        6., 5., 5., 5.,
+        3., 2., 2., 2.
+    };
+
+    return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            3,
+            3,
+            layout,
+            biasEnabled);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
+    std::vector<float> kernelNoQuantizedValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9,
+
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    };
+
+    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
+    // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
+    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+    {
+        12., 10., 10., 10.,
+        12., 10., 10., 10.,
+        12., 10., 10., 10.,
+         6.,  4.,  4.,  4.
+    };
+
+    return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            3,
+            3,
+            layout,
+            biasEnabled);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+    {
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+    };
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
+    std::vector<float> kernelNoQuantizedValues =
+    {
+        1, 2,
+        3, 4
+    };
+
+    // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
+    // therefore the output will be 4x4: (I âˆ’ K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
+    // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
+    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+    {
+        4,  7,  7, 3,
+        6, 10, 10, 4,
+        6, 10, 10, 4,
+        2,  3,  3, 1
+    };
+    uint32_t padLeft = 1;
+    uint32_t padTop = 1;
+    uint32_t padRight = 1;
+    uint32_t padBottom = 1;
+
+    return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            2,
+            2,
+            layout,
+            padLeft,
+            padTop,
+            padRight,
+            padBottom,
+            3,
+            3,
+            biasEnabled
+            );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T,4> CompareConvolution2dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory)
+{
+    unsigned int inputHeight   = 8;
+    unsigned int inputWidth    = 16;
+    unsigned int inputChannels = 3;
+    unsigned int inputNum      = 5;
+
+    unsigned int kernelHeight = 3;
+    unsigned int kernelWidth  = 3;
+
+    unsigned int strideX = 2;
+    unsigned int strideY = 3;
+    unsigned int padX    = 1;
+    unsigned int padY    = 1;
+
+    unsigned int outputNum      = inputNum;
+    unsigned int outputChannels = 2;
+    unsigned int outputHeight   = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
+    unsigned int outputWidth    = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+    armnn::TensorInfo kernelDesc;
+    armnn::TensorInfo biasDesc;
+
+    unsigned int inputShape[]  = {inputNum, inputChannels, inputHeight, inputWidth};
+    unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
+    unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
+    unsigned int biasShape[]   = {outputChannels};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+    kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
+    biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
+
+    LayerTestResult<T,4> ret(outputTensorInfo);
+
+    auto input  = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
+    auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
+    auto bias   = MakeRandomTensor<T, 1>(biasDesc, 1028);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::Convolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor;
+    data.m_Parameters.m_StrideX = strideX;
+    data.m_Parameters.m_StrideY = strideY;
+    data.m_Parameters.m_PadLeft = padX;
+    data.m_Parameters.m_PadRight = padX;
+    data.m_Parameters.m_PadTop = padY;
+    data.m_Parameters.m_PadBottom = padY;
+    data.m_Parameters.m_BiasEnabled = true;
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
+
+    armnn::Convolution2dQueueDescriptor refData = data;
+    armnn::WorkloadInfo               refInfo = info;
+    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
+    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
+
+    std::unique_ptr<armnn::IWorkload> workload  = workloadFactory.CreateConvolution2d(data, info);
+    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
+
+    outputHandleRef->Allocate();
+    inputHandleRef->Allocate();
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    workloadRef->PostAllocationConfigure();
+    workloadRef->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
+
+    return ret;
+}
+
+//
+// DepthwiseConvolution2d implementations
+//
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const boost::multi_array<T, 4>& input,
+    const boost::multi_array<T, 4>& kernel,
+    const boost::multi_array<B, 1>& bias,
+    const boost::multi_array<T, 4>& outputExpected,
+    float qScale,
+    int32_t qOffset,
+    const armnn::DataLayout layout,
+    uint32_t padLeft = 0,
+    uint32_t padTop = 0,
+    uint32_t padRight = 0,
+    uint32_t padBottom = 0,
+    uint32_t strideX = 1,
+    uint32_t strideY = 1)
+{
+    unsigned int inputNum       = boost::numeric_cast<unsigned int>(input.shape()[0]);
+    unsigned int inputChannels  = boost::numeric_cast<unsigned int>(input.shape()[1]);
+    unsigned int inputHeight    = boost::numeric_cast<unsigned int>(input.shape()[2]);
+    unsigned int inputWidth     = boost::numeric_cast<unsigned int>(input.shape()[3]);
+    unsigned int kernelChanMul  = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+    unsigned int kernelHeight   = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+    unsigned int kernelWidth    = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
+    unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
+    unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
+    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
+    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
+
+    // If a bias is used, its size must equal the number of output channels.
+    bool biasEnabled = bias.size() > 0;
+    BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
+
+    // Creates the tensors.
+    armnn::TensorInfo inputTensorInfo =
+            armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo =
+            armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
+    armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
+    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        kernelDesc.SetQuantizationScale(qScale);
+        kernelDesc.SetQuantizationOffset(qOffset);
+        biasDesc.SetQuantizationScale(qScale*qScale);
+        biasDesc.SetQuantizationOffset(0);
+    }
+
+    // Construct the input data.
+    std::vector<T> inputData;
+    inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
+
+    // At this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
+        inputData = tmp;
+    }
+
+    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    // Construct the output data, with bias applied, as appropriate.
+    std::vector<T> outputData;
+    outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
+    if (biasEnabled)
+    {
+        std::vector<T> biasV;
+        biasV.assign(bias.data(), bias.data() + outputChannels);
+        ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+            biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+            outputWidth, outputHeight);
+    }
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+
+    // At this point if we require it permute the expected output
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
+        outputData = tmp;
+    }
+
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+    if (biasEnabled)
+    {
+        AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+    }
+
+    armnn::DepthwiseConvolution2dQueueDescriptor data;
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
+    data.m_Parameters.m_StrideX = strideX;
+    data.m_Parameters.m_StrideY = strideY;
+    data.m_Parameters.m_PadLeft = padLeft;
+    data.m_Parameters.m_PadRight = padRight;
+    data.m_Parameters.m_PadTop = padTop;
+    data.m_Parameters.m_PadBottom = padBottom;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    using B = armnn::ResolveType<ArmnnBType>;
+
+    unsigned int inputHeight = 3;
+    unsigned int inputWidth = 3;
+    unsigned int inputChannels = 2;
+    unsigned int inputNum = 1;
+
+    unsigned int kernelHeight = 3;
+    unsigned int kernelWidth = 3;
+    unsigned int kernelChannels = inputChannels;
+    unsigned int kernelDepthMultiplier = 1;
+
+    unsigned int outputHeight = 1;
+    unsigned int outputWidth = 1;
+    unsigned int outputChannels = kernelChannels;
+    unsigned int outputNum = inputNum;
+
+    armnn::TensorInfo inputTensorInfo =
+            armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo =
+            armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
+    armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
+                                 ArmnnType);
+    armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        kernelDesc.SetQuantizationScale(qScale);
+        kernelDesc.SetQuantizationOffset(qOffset);
+        biasDesc.SetQuantizationScale(qScale*qScale);
+        biasDesc.SetQuantizationOffset(0);
+    }
+    std::vector<T> inputData = std::vector<T>(
+            QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
+                    1.f, 2.f, 1.f,
+                    2.f, 1.f, 2.f,
+                    1.f, 2.f, 1.f,
+
+                    1.f, 2.f, 1.f,
+                    2.f, 1.f, 2.f,
+                    1.f, 2.f, 1.f,
+            }));
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
+        inputData = tmp;
+    }
+    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+                                            {0, 2}));
+    auto bias = MakeTensor<B, 1>(biasDesc, biasV);
+
+    std::vector<T> kernelData = std::vector<T>(
+            QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
+                    1.f, 0.f,  1.f,
+                    0.f, 0.f,  0.f,
+                    -1.f, 0.f, -1.f,
+
+                    1.f, 0.f,  1.f,
+                    0.f, 0.f,  0.f,
+                    -1.f, 0.f, -1.f,
+            }));
+    auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
+
+    // Manually calculated.
+    std::vector<T> outputImage(
+        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                           outputTensorInfo.GetQuantizationOffset(),
+                           {0.f, 0.f})
+    );
+
+    // Optionally apply bias to output image.
+    if(biasEnabled)
+    {
+        ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+                  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+                  outputWidth, outputHeight);
+    }
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(outputImage.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
+        outputImage = tmp;
+    }
+
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::DepthwiseConvolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
+    data.m_Parameters.m_StrideX = 1;
+    data.m_Parameters.m_StrideY = 1;
+    data.m_Parameters.m_PadLeft = 0;
+    data.m_Parameters.m_PadRight = 0;
+    data.m_Parameters.m_PadTop = 0;
+    data.m_Parameters.m_PadBottom = 0;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout;
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    using B = armnn::ResolveType<ArmnnBType>;
+
+    unsigned int depthMultiplier = 2;
+
+    unsigned int inputHeight    = 8;
+    unsigned int inputWidth     = 16;
+    unsigned int inputChannels  = 2;
+    unsigned int inputBatchSize = 1;
+
+    unsigned int kernelHeight = 5;
+    unsigned int kernelWidth  = 3;
+
+    unsigned int outputHeight    = inputHeight - kernelHeight + 1 + 2;
+    unsigned int outputWidth     = (inputWidth - kernelWidth + 1)/2;
+    unsigned int outputChannels  = inputChannels * depthMultiplier;
+    unsigned int outputBatchSize = inputBatchSize;
+
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
+            inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
+            outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
+    armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
+                                 ArmnnType);
+    armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        kernelDesc.SetQuantizationScale(qScale);
+        kernelDesc.SetQuantizationOffset(qOffset);
+        biasDesc.SetQuantizationScale(qScale*qScale);
+        biasDesc.SetQuantizationOffset(0);
+    }
+
+    // NOTE: originalInputData is in NCHW format
+    std::vector<T> originalInputData = std::vector<T>(
+            QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            }));
+    std::vector<T> inputData = originalInputData;
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            originalInputData.data(), inputData.data(), sizeof(T));
+    }
+    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+        {0, 2, 1, -1}));
+    auto bias = MakeTensor<B, 1>(biasDesc, biasV);
+
+    std::vector<T> kernelData = std::vector<T>(
+            QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
+                    1, 1, 1,
+                    1, -1, 1,
+                    1, 1, 1,
+                    1, 1, 1,
+                    1, 1, 1,
+
+                    2, 2, 2,
+                    2, 2, 2,
+                    2, 2, 2,
+                    2, 2, 2,
+                    2, 2, 2,
+
+                    0, 0, 0,
+                    0, -1, 0,
+                    0, 0, 0,
+                    0, 0, 0,
+                    0, 0, 0,
+
+                    0, 0, 0,
+                    0, 0, 0,
+                    0, 1, 0,
+                    0, 0, 0,
+                    0, 0, 0
+
+            }));
+    auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
+
+    // Manually calculated.
+    std::vector<T> originalOutputImage = std::vector<T>(
+        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
+            3.5f,  3.5f,  3.5f,  3.5f,  3.5f,  3.5f,  3.5f,
+            6.0f,  6.0f,  6.0f,  6.0f,  6.0f,  6.0f,  6.0f,
+            5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,
+            6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,
+            6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,  6.5f,
+            5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,  5.0f,
+
+            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
+            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
+            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
+            -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
+
+            8.0f,  8.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            10.0f, 10.0f, 0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            8.0f,  8.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,
+            0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f
+        }));
+
+    // Optionally apply bias to output image.
+    if(biasEnabled)
+    {
+        ApplyBias(originalOutputImage,
+                  outputTensorInfo.GetQuantizationScale(),
+                  outputTensorInfo.GetQuantizationOffset(),
+                  biasV,
+                  biasDesc.GetQuantizationScale(),
+                  biasDesc.GetQuantizationOffset(),
+                  outputWidth,
+                  outputHeight);
+    }
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+    std::vector<T> outputImage = originalOutputImage;
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
+                            originalOutputImage.data(), outputImage.data(), sizeof(T));
+    }
+
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::DepthwiseConvolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
+    data.m_Parameters.m_StrideX = 2;
+    data.m_Parameters.m_StrideY = 1;
+    data.m_Parameters.m_PadLeft = 0;
+    data.m_Parameters.m_PadRight = 0;
+    data.m_Parameters.m_PadTop = 1;
+    data.m_Parameters.m_PadBottom = 1;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout;
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+        typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const boost::multi_array<T, 4>& originalInput,
+    const boost::multi_array<T, 4>& originalKernel,
+    const boost::multi_array<B, 1>& bias,
+    const boost::multi_array<T, 4>& originalOutputExpected,
+    float qScale,
+    int32_t qOffset,
+    const armnn::DataLayout layout = armnn::DataLayout::NCHW,
+    uint32_t padLeft = 0,
+    uint32_t padTop = 0,
+    uint32_t padRight = 0,
+    uint32_t padBottom = 0,
+    uint32_t strideX = 1,
+    uint32_t strideY = 1,
+    uint32_t dilationX = 1,
+    uint32_t dilationY = 1)
+{
+    unsigned int inputHeight   = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
+    unsigned int inputWidth    = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
+    unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
+    unsigned int inputNum      = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
+
+    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
+    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
+    unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
+    unsigned int outputNum      = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
+
+    unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
+    unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
+    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
+    unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
+
+    bool biasEnabled = bias.size() > 0;
+
+    // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
+    BOOST_ASSERT(inputNum == 1);
+    BOOST_ASSERT(outputNum == 1);
+
+    // If a bias is used, its size must equal the number of output channels.
+    BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
+
+
+    // Note these tensors will use two (identical) batches.
+    armnn::TensorInfo inputTensorInfo =
+            armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo =
+            armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
+
+    // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
+    armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
+
+    armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+        kernelDesc.SetQuantizationScale(qScale);
+        kernelDesc.SetQuantizationOffset(qOffset);
+        biasDesc.SetQuantizationScale(qScale*qScale);
+        biasDesc.SetQuantizationOffset(0);
+    }
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+
+    // Construct input data
+    std::vector<T> input;
+    input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
+    std::vector<T> inputData;
+    inputData.insert(inputData.end(), input.begin(), input.end());
+    inputData.insert(inputData.end(), input.begin(), input.end());
+
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
+        inputData = tmp;
+    }
+
+    auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    std::vector<T> output;
+    output.assign(originalOutputExpected.data(),
+                       originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
+
+    // Apply bias to output data if it is enabled.
+    if(biasEnabled)
+    {
+        std::vector<T> biasV;
+        biasV.assign(bias.data(), bias.data() + outputChannels);
+        ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+                  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+                  outputWidth, outputHeight);
+    }
+
+    // Construct expected output data
+    std::vector<T> outputData;
+    outputData.insert(outputData.end(), output.begin(), output.end());
+    outputData.insert(outputData.end(), output.begin(), output.end());
+
+    // at this point if we require it permute the expected output
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
+        outputData = tmp;
+    }
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::DepthwiseConvolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+    boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+
+    if(biasEnabled)
+    {
+        AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+    }
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
+    data.m_Parameters.m_StrideX = strideX;
+    data.m_Parameters.m_StrideY = strideY;
+    data.m_Parameters.m_PadLeft = padLeft;
+    data.m_Parameters.m_PadRight = padRight;
+    data.m_Parameters.m_PadTop = padTop;
+    data.m_Parameters.m_PadBottom = padBottom;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout;
+    data.m_Parameters.m_DilationX = dilationX;
+    data.m_Parameters.m_DilationY = dilationY;
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    // Use a single-batch 2-channel 5x5 image as input.
+    armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
+    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
+        {
+             0,  1,  2,  3,  4,
+             5,  6,  7,  8,  9,
+            10, 11, 12, 13, 14,
+            15, 16, 17, 18, 19,
+            20, 21, 22, 23, 24,
+
+            25, 26, 27, 28, 29,
+            30, 31, 32, 33, 34,
+            35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44,
+            45, 46, 47, 48, 49
+        })));
+
+    // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
+    armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
+    auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
+        QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
+        {
+            32, 31, 30, 29,
+            28, 27, 26, 25,
+            24, 23, 22, 21,
+            20, 19, 18, 17,
+
+            16, 15, 14, 13,
+            12, 11, 10,  9,
+             8,  7,  6,  5,
+             4,  3,  2,  1
+        })));
+
+    // Expected output is 1 batch of a 2-channel 5x5 image.
+    // Calculated using the python tensorflow library with strideX=1, strideY=1.
+    armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+        {
+            1062, 1580, 1850, 1530, 1117,
+            2140, 3108, 3500, 2842, 2042,
+            3580, 5068, 5460, 4342, 3062,
+            3618, 5072, 5390, 4248, 2971,
+            3074, 4282, 4510, 3533, 2457,
+
+            1550, 2284, 2362, 1955, 1428,
+            2910, 4206, 4342, 3528, 2536,
+            3390, 4886, 5022, 4068, 2916,
+            3566, 5056, 5182, 4133, 2922,
+            3100, 4352, 4452, 3517, 2465
+        })));
+
+    return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout,
+        1,  // Padding left.
+        1,  // Padding top.
+        2,  // Padding right.
+        2,  // Padding bottom.
+        1,  // strideX
+        1); // strideY
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled)
+{
+    auto layout = armnn::DataLayout::NHWC;
+
+    armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
+    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
+        {
+             0,  1,  2,  3,  4,
+             5,  6,  7,  8,  9,
+            10, 11, 12, 13, 14,
+            15, 16, 17, 18, 19,
+            20, 21, 22, 23, 24,
+
+            25, 26, 27, 28, 29,
+            30, 31, 32, 33, 34,
+            35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44,
+            45, 46, 47, 48, 49
+        })));
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
+    auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
+        QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
+        {
+             32, 31, 30, 29,
+             28, 27, 26, 25,
+             24, 23, 22, 21,
+             20, 19, 18, 17,
+
+             16, 15, 14, 13,
+             12, 11, 10,  9,
+              8,  7,  6,  5,
+              4,  3,  2,  1
+        })));
+
+    armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+        {
+            1062, 1580, 1850, 1530, 1117,
+            2140, 3108, 3500, 2842, 2042,
+            3580, 5068, 5460, 4342, 3062,
+            3618, 5072, 5390, 4248, 2971,
+            3074, 4282, 4510, 3533, 2457,
+
+            1550, 2284, 2362, 1955, 1428,
+            2910, 4206, 4342, 3528, 2536,
+            3390, 4886, 5022, 4068, 2916,
+            3566, 5056, 5182, 4133, 2922,
+            3100, 4352, 4452, 3517, 2465
+        })));
+
+    return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout,
+        1,  // Padding left.
+        1,  // Padding top.
+        2,  // Padding right.
+        2,  // Padding bottom.
+        1,  // strideX
+        1);  // strideY
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
+         typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    bool biasEnabled)
+{
+    auto layout = armnn::DataLayout::NHWC;
+
+    armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
+    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
+        {
+             0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 1, 1, 1, 0, 0, 0,
+             0, 0, 0, 1, 1, 1, 0, 0, 0,
+             0, 0, 0, 1, 1, 1, 0, 0, 0,
+             0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0, 0, 0, 0
+        })));
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
+    auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
+        QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
+        {
+             1, 2, 3,
+             4, 5, 6,
+             7, 8, 9
+        })));
+
+    uint32_t padLeft = 0;
+    uint32_t padTop = 0;
+    uint32_t padRight = 0;
+    uint32_t padBottom = 0;
+    uint32_t strideX  = 1;
+    uint32_t strideY  = 1;
+    uint32_t dilationX  = 3;
+    uint32_t dilationY  = 3;
+
+    // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
+    armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
+    boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+        {
+             5, 5, 5,
+             5, 5, 5,
+             5, 5, 5
+        })));
+
+    return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+        workloadFactory,
+        memoryManager,
+        input,
+        kernel,
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
+        expectedOutput,
+        qScale,
+        qOffset,
+        layout,
+        padLeft,
+        padTop,
+        padRight,
+        padBottom,
+        strideX,
+        strideY,
+        dilationX,
+        dilationY);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const std::vector<float>& inputNoQuantizedValues,
+        armnn::TensorInfo& inputTensorInfo,
+        const std::vector<float>& kernelNoQuantizedValues,
+        armnn::TensorInfo& kernelTensorInfo,
+        const std::vector<float>& outputExpectedNoQuantizedValues,
+        armnn::TensorInfo& outputTensorInfo,
+        uint32_t dilationX,
+        uint32_t dilationY,
+        armnn::DataLayout layout = armnn::DataLayout::NCHW,
+        bool biasEnabled = false)
+{
+    float qScale;
+    int32_t qOffset;
+    switch (ArmnnType)
+    {
+        case armnn::DataType::QuantisedAsymm8:
+        {
+            qScale = 0.1f;
+            qOffset = 128;
+            break;
+        }
+        case armnn::DataType::QuantisedSymm16:
+        {
+            qScale = 0.1f;
+            qOffset = 0;
+            break;
+        }
+        case armnn::DataType::Float32:
+        default:
+        {
+            qScale = 0.f;
+            qOffset = 0;
+            break;
+        }
+    }
+
+    inputTensorInfo.SetQuantizationScale(qScale);
+    inputTensorInfo.SetQuantizationOffset(qOffset);
+    kernelTensorInfo.SetQuantizationScale(qScale);
+    kernelTensorInfo.SetQuantizationOffset(qOffset);
+    outputTensorInfo.SetQuantizationScale(qScale);
+    outputTensorInfo.SetQuantizationOffset(qOffset);
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo,
+                                  std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                    inputTensorInfo.GetQuantizationOffset(),
+                                                                    inputNoQuantizedValues)));
+    auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
+                                   std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
+                                                                     kernelTensorInfo.GetQuantizationOffset(),
+                                                                     kernelNoQuantizedValues)));
+    auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
+                                           std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                             outputTensorInfo.GetQuantizationOffset(),
+                                                                             outputExpectedNoQuantizedValues)));
+
+    uint32_t padLeft = 0;
+    uint32_t padTop = 0;
+    uint32_t padRight = 0;
+    uint32_t padBottom = 0;
+    uint32_t strideX  = 1;
+    uint32_t strideY  = 1;
+
+    return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            input,
+            kernel,
+            GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
+            expectedOutput,
+            qScale,
+            qOffset,
+            layout,
+            padLeft,
+            padTop,
+            padRight,
+            padBottom,
+            strideX,
+            strideY,
+            dilationX,
+            dilationY);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+            {
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            };
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
+    std::vector<float> kernelNoQuantizedValues =
+            {
+                    1, 2, 3,
+                    4, 5, 6,
+                    7, 8, 9
+            };
+
+    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
+    // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
+    armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+            {
+                    6., 5., 5., 5.,
+                    6., 5., 5., 5.,
+                    6., 5., 5., 5.,
+                    3., 2., 2., 2.
+            };
+
+    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            3,
+            3,
+            layout,
+            biasEnabled);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+            {
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            };
+
+    armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
+    std::vector<float> kernelNoQuantizedValues =
+            {
+                    1, 2, 3,
+                    4, 5, 6,
+                    7, 8, 9,
+
+                    1, 2, 3,
+                    4, 5, 6,
+                    7, 8, 9
+            };
+
+    // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
+    // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
+    armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+            {
+                    6., 5., 5., 5.,
+                    6., 5., 5., 5.,
+                    6., 5., 5., 5.,
+                    3., 2., 2., 2.,
+
+                    6., 5., 5., 5.,
+                    6., 5., 5., 5.,
+                    6., 5., 5., 5.,
+                    3., 2., 2., 2.
+            };
+
+    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            3,
+            3,
+            layout,
+            biasEnabled);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
+            armnn::IWorkloadFactory& workloadFactory,
+            const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+            bool biasEnabled,
+            const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+            {
+                    10.0, 10.0, 10.0,
+                    10.0, 10.0, 10.0,
+                    10.0, 10.0, 10.0,
+
+                    21.0, 22.0, 23.0,
+                    24.0, 25.0, 26.0,
+                    27.0, 28.0, 29.0
+            };
+
+    armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
+
+    std::vector<float> kernelNoQuantizedValues =
+            {
+                    0.25f, 0.25f,
+                    0.25f, 0.25f,
+
+                    0.25f, 0.25f,
+                    0.25f, 0.25f,
+
+                    0.0f , 0.0f,
+                    0.0f , 0.1f,
+
+                    0.0f , 0.0f,
+                    0.0f , 0.1f,
+
+                    0.2f , 0.0f,
+                    0.0f , 0.0f,
+
+                    0.2f , 0.0f,
+                    0.0f , 0.0f,
+
+                    0.0f , 0.3f,
+                    0.0f , 0.0f,
+
+                    0.0f , 0.3f,
+                    0.0f , 0.0f
+            };
+
+    armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+            {
+                    10.f, 10.f,
+                    10.f, 10.f,
+
+                    1.f, 1.f,
+                    1.f, 1.f,
+
+                    2.f, 2.f,
+                    2.f, 2.f,
+
+                    3.f, 3.f,
+                    3.f, 3.f,
+
+                    23.f, 24.f,
+                    26.f, 27.f,
+
+                    2.5f, 2.6000001f,
+                    2.8f, 2.9f,
+
+                    4.2000003f, 4.4f,
+                    4.8f, 5.f,
+
+                    6.6000004f, 6.9f,
+                    7.5000005f, 7.8f
+            };
+
+
+    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            1,
+            1,
+            layout,
+            biasEnabled);
+}
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
+LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
+            armnn::IWorkloadFactory& workloadFactory,
+            const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+            bool biasEnabled,
+            const armnn::DataLayout layout)
+{
+    armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
+    std::vector<float> inputNoQuantizedValues =
+            {
+                    10.0, 10.0, 10.0,
+                    10.0, 10.0, 10.0,
+                    10.0, 10.0, 10.0,
+
+                    21.0, 22.0, 23.0,
+                    24.0, 25.0, 26.0,
+                    27.0, 28.0, 29.0
+            };
+
+    armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
+
+    std::vector<float> kernelNoQuantizedValues =
+            {
+                    0.25f, 0.25f,
+                    0.25f, 0.25f,
+
+                    0.2f , 0.0f,
+                    0.0f , 0.0f,
+
+                    0.0f , 0.0f,
+                    0.0f , 0.1f,
+
+                    0.0f , 0.3f,
+                    0.0f , 0.0f
+
+            };
+
+    armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
+    std::vector<float> outputExpectedNoQuantizedValues =
+            {
+                    10.f, 10.f,
+                    10.f, 10.f,
+
+                    1.f, 1.f,
+                    1.f, 1.f,
+
+                    4.2000003f, 4.4f,
+                    4.8f, 5.f,
+
+                    6.6000004f, 6.9f,
+                    7.5000005f, 7.8f
+            };
+
+
+    return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
+            workloadFactory,
+            memoryManager,
+            inputNoQuantizedValues,
+            inputTensorInfo,
+            kernelNoQuantizedValues,
+            kernelTensorInfo,
+            outputExpectedNoQuantizedValues,
+            outputTensorInfo,
+            1,
+            1,
+            layout,
+            biasEnabled);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    const armnnUtils::DataLayoutIndexed& layout)
+{
+    unsigned int inputHeight = 8;
+    unsigned int inputWidth = 16;
+    unsigned int inputChannels = 3;
+    unsigned int inputNum = 5;
+
+    unsigned int kernelHeight = 3;
+    unsigned int kernelWidth = 3;
+    unsigned int channelMultiplier = 1;
+
+    unsigned int strideX = 2;
+    unsigned int strideY = 3;
+    unsigned int padX = 1;
+    unsigned int padY = 1;
+
+    unsigned int outputNum = inputNum;
+    unsigned int outputChannels = inputChannels * channelMultiplier;
+    unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
+    unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+    armnn::TensorInfo kernelDesc;
+    armnn::TensorInfo biasDesc;
+
+
+    std::vector<unsigned int> inputShape;
+    std::vector<unsigned int> outputShape;
+    std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
+    std::vector<unsigned int> biasShape{ outputChannels };
+    switch (layout.GetDataLayout())
+    {
+        case armnn::DataLayout::NCHW:
+            inputShape =  { inputNum, inputChannels, inputHeight, inputWidth };
+            outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
+            break;
+        case armnn::DataLayout ::NHWC:
+            inputShape =  { inputNum, inputHeight, inputWidth, inputChannels };
+            outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
+            break;
+        default:
+            throw armnn::InvalidArgumentException("unknown data layout ["
+                                                  + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
+    }
+
+    float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
+    float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
+    int32_t qOffset = 0;
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
+    kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
+    biasDesc = armnn::TensorInfo(
+        1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+
+    auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
+    auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
+    auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
+            biasDesc, 1028, 0.0f, 255.0f);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::DepthwiseConvolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor;
+    data.m_Parameters.m_StrideX = strideX;
+    data.m_Parameters.m_StrideY = strideY;
+    data.m_Parameters.m_PadLeft = padX;
+    data.m_Parameters.m_PadRight = padX;
+    data.m_Parameters.m_PadTop = padY;
+    data.m_Parameters.m_PadBottom = padY;
+    data.m_Parameters.m_BiasEnabled = true;
+    data.m_Parameters.m_DataLayout = layout.GetDataLayout();
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
+
+    armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
+    armnn::WorkloadInfo refInfo = info;
+    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
+    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
+
+    outputHandleRef->Allocate();
+    inputHandleRef->Allocate();
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    workloadRef->PostAllocationConfigure();
+    workloadRef->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
+
+    return ret;
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+    armnn::IWorkloadFactory&,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+    bool,
+    armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory&,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+    bool,
+    armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory&,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+    bool,
+    armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+    armnn::IWorkloadFactory&,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+    bool,
+    armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory&,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+    bool,
+    armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory&,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+    bool,
+    armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+    armnn::IWorkloadFactory &workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory &workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory &workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+        armnn::IWorkloadFactory&,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+        bool,
+        armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+        armnn::IWorkloadFactory&,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+        bool,
+        armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+        armnn::IWorkloadFactory&,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+        bool,
+        armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+        armnn::IWorkloadFactory&,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+        bool,
+        armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+        armnn::IWorkloadFactory&,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+        bool,
+        armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+        armnn::IWorkloadFactory&,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
+        bool,
+        armnn::DataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout);
+
+//
+// Implementation functions
+//
+
+LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
+}
+
+LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled)
+{
+    return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        biasEnabled,
+        armnn::DataLayout::NHWC);
+}
+
+LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        biasEnabled,
+        layout);
+}
+
+LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+            workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout layout)
+{
+    return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory, memoryManager, layout, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout layout)
+{
+    return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
+            <armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory, memoryManager, layout, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> Convolution1dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled)
+{
+    return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
+}
+
+LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled)
+{
+    return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+            workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
+}
+
+LayerTestResult<float,4> CompareConvolution2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory)
+{
+    return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
+            workloadFactory, memoryManager, refWorkloadFactory);
+}
+
+LayerTestResult<float, 4> DepthwiseConvolution2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled)
+{
+    return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
+}
+
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
+    auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
+
+    std::vector<float> kernelData;
+    std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
+    for (unsigned int i = 0; i < 64; ++i)
+    {
+        kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
+    }
+    armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
+    auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
+
+    std::vector<float> expectedOutputData(64, 0.f);
+    armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
+    auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
+
+    return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            input,
+            kernel,
+            boost::multi_array<float, 1>(),
+            expectedOutput,
+            0.f,
+            0,
+            armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
+        workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
+}
+
+LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            0.f,
+            0,
+            false);
+}
+
+LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
+                armnn::IWorkloadFactory& workloadFactory,
+                const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+                bool biasEnabled,
+                const armnn::DataLayout layout)
+{
+    return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
+        workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
+}
+
+LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    const armnn::DataLayout layout)
+{
+    return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, refWorkloadFactory, layout);
+}
+
+LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    const armnn::DataLayout layout)
+{
+    return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, refWorkloadFactory, layout);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp
new file mode 100644 (file)
index 0000000..f5ff586
--- /dev/null
@@ -0,0 +1,218 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/Types.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+//
+// Convolution2d
+//
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
+    armnn::IWorkloadFactory &workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled,
+        const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout layout);
+
+LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout layout);
+
+LayerTestResult<float,   4> Convolution1dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+LayerTestResult<float, 4> CompareConvolution2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory);
+
+//
+// DepthwiseConvolution2d
+//
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+template<typename T>
+LayerTestResult<T, 4> CompareDepthwiseConvolution2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> DepthwiseConvolution2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    const armnn::DataLayout layout);
@@ -3,18 +3,14 @@
 // SPDX-License-Identifier: MIT
 //
 
-#pragma once
+#include "ConvertFp16ToFp32TestImpl.hpp"
 
-#include "WorkloadTestUtils.hpp"
+#include <Half.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
-
-#include <Half.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
diff --git a/src/backends/backendsCommon/test/layerTests/ConvertFp16ToFp32TestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ConvertFp16ToFp32TestImpl.hpp
new file mode 100644 (file)
index 0000000..8e77e26
--- /dev/null
@@ -0,0 +1,15 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> SimpleConvertFp16ToFp32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -3,22 +3,15 @@
 // SPDX-License-Identifier: MIT
 //
 
-#pragma once
-
-#include "WorkloadTestUtils.hpp"
-
-#include <Half.hpp>
+#include "ConvertFp32ToFp16TestImpl.hpp"
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
-
 LayerTestResult<armnn::Half, 4> SimpleConvertFp32ToFp16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
diff --git a/src/backends/backendsCommon/test/layerTests/ConvertFp32ToFp16TestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ConvertFp32ToFp16TestImpl.hpp
new file mode 100644 (file)
index 0000000..bb0fce2
--- /dev/null
@@ -0,0 +1,17 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <Half.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<armnn::Half, 4> SimpleConvertFp32ToFp16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2,17 +2,15 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
+#include "DebugTestImpl.hpp"
+
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
@@ -81,7 +79,7 @@ LayerTestResult<T, Dim> DebugTestImpl(
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> Debug4DTest(
+LayerTestResult<T, 4> Debug4dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -134,7 +132,7 @@ LayerTestResult<T, 4> Debug4DTest(
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> Debug3DTest(
+LayerTestResult<T, 3> Debug3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -185,7 +183,7 @@ LayerTestResult<T, 3> Debug3DTest(
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> Debug2DTest(
+LayerTestResult<T, 2> Debug2dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -234,7 +232,7 @@ LayerTestResult<T, 2> Debug2DTest(
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 1> Debug1DTest(
+LayerTestResult<T, 1> Debug1dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -281,3 +279,87 @@ LayerTestResult<T, 1> Debug1DTest(
 }
 
 } // anonymous namespace
+
+LayerTestResult<float, 4> Debug4dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug4dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 3> Debug3dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug3dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> Debug2dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug2dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 1> Debug1dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug1dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> Debug4dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug4dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 3> Debug3dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug3dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> Debug2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug2dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 1> Debug1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug1dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> Debug4dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug4dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 3> Debug3dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug3dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 2> Debug2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug2dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 1> Debug1dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Debug1dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp
new file mode 100644 (file)
index 0000000..e355279
--- /dev/null
@@ -0,0 +1,59 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> Debug4dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> Debug3dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> Debug2dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 1> Debug1dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> Debug4dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> Debug3dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> Debug2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 1> Debug1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> Debug4dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 3> Debug3dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> Debug2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 1> Debug1dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2,17 +2,15 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
+#include "DequantizeTestImpl.hpp"
+
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
@@ -130,3 +128,24 @@ LayerTestResult<float, 4> DequantizeOffsetTest(
 }
 
 } // anonymous namespace
+
+LayerTestResult<float, 4> DequantizeSimpleUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return DequantizeSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> DequantizeOffsetUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return DequantizeOffsetTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> DequantizeSimpleInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return DequantizeSimpleTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/DequantizeTestImpl.hpp
new file mode 100644 (file)
index 0000000..55ea4b4
--- /dev/null
@@ -0,0 +1,23 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> DequantizeSimpleUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> DequantizeOffsetUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> DequantizeSimpleInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -4,15 +4,18 @@
 //
 #pragma once
 
-#include "TensorCopyUtils.hpp"
 #include <ResolveType.hpp>
-#include "WorkloadTestUtils.hpp"
 
 #include <armnn/Types.hpp>
+
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/IBackendInternal.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
 #include <backendsCommon/test/WorkloadFactoryHelper.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
 #include <test/TensorHelpers.hpp>
 
 namespace
@@ -363,4 +366,4 @@ void DetectionPostProcessFastNmsQuantizedTest()
         FastNmsExpectedResults::s_DetectionScores,
         FastNmsExpectedResults::s_NumDetections,
         false);
-}
\ No newline at end of file
+}
diff --git a/src/backends/backendsCommon/test/layerTests/FakeQuantizationTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/FakeQuantizationTestImpl.cpp
new file mode 100644 (file)
index 0000000..1ce9d2d
--- /dev/null
@@ -0,0 +1,74 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FakeQuantizationTestImpl.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+LayerTestResult<float, 2> FakeQuantizationTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    constexpr unsigned int width = 2;
+    constexpr unsigned int height = 3;
+
+    const armnn::TensorInfo tensorInfo({height, width },
+        armnn::DataType::Float32);
+
+    auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
+       -10.0f, -5.0f,
+         0.0f,  5.0f,
+        10.0f, 10.0f
+    }));
+
+    LayerTestResult<float, 2> ret(tensorInfo);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle   = workloadFactory.CreateTensorHandle(tensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
+
+    armnn::FakeQuantizationQueueDescriptor data;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
+
+    float min = -10.f;
+    float max =  10.f;
+
+    data.m_Parameters.m_Min = min;
+    data.m_Parameters.m_Max = max;
+
+    armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]);
+    armnn::FakeQuantizationQueueDescriptor refData = data;
+    armnn::WorkloadInfo refInfo = info;
+    SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
+
+    ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
+        0.0f,     63.0f,
+        128.0f,   191.0f,
+        255.0f,   255.0f
+    }));
+
+    return ret;
+}
diff --git a/src/backends/backendsCommon/test/layerTests/FakeQuantizationTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/FakeQuantizationTestImpl.hpp
new file mode 100644 (file)
index 0000000..506e968
--- /dev/null
@@ -0,0 +1,15 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 2> FakeQuantizationTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/FloorTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/FloorTestImpl.cpp
new file mode 100644 (file)
index 0000000..f97d51a
--- /dev/null
@@ -0,0 +1,70 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FloorTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> SimpleFloorTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+
+    armnn::TensorInfo outputTensorInfo(inputTensorInfo);
+    outputTensorInfo.SetQuantizationScale(0.1f);
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(
+        { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f,
+        1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f },
+        inputTensorInfo));
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, ConvertToDataType<ArmnnType>(
+        { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f,
+        1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f },
+        outputTensorInfo));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::FloorQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFloor(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+SimpleFloorTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+SimpleFloorTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/FloorTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/FloorTestImpl.hpp
new file mode 100644 (file)
index 0000000..e5baf5d
--- /dev/null
@@ -0,0 +1,18 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleFloorTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp
new file mode 100644 (file)
index 0000000..c84b941
--- /dev/null
@@ -0,0 +1,349 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FullyConnectedTestImpl.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+//
+// Implementation templates
+//
+
+template<typename T, typename B>
+LayerTestResult<T, 2> SimpleFullyConnectedTestImpl(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        armnn::TensorInfo inputTensorInfo,
+        armnn::TensorInfo outputTensorInfo,
+        armnn::TensorInfo weightsDesc,
+        armnn::TensorInfo biasesDesc,
+        boost::multi_array<T, 2>& weights,
+        boost::multi_array<B, 1>& bias,
+        boost::multi_array<T, 4>& input,
+        bool biasEnabled,
+        bool transposeWeights)
+{
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::FullyConnectedQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    armnn::ScopedCpuTensorHandle weightsTensor(weightsDesc);
+    armnn::ScopedCpuTensorHandle biasTensor(biasesDesc);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, &weights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor;
+    data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_TransposeWeightMatrix = transposeWeights;
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFullyConnected(data, info);
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> FullyConnectedTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled)
+{
+    constexpr static unsigned int inputWidth = 3u;
+    constexpr static unsigned int inputHeight = 2u;
+    constexpr static unsigned int inputChannels = 1u;
+
+    constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels;
+
+    constexpr static unsigned int outputChannels = 2u;
+
+    armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+    inputTensorInfo.SetQuantizationOffset(63);
+
+    armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(5.f);
+    outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10);
+
+    armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, ArmnnType);
+    weightsDesc.SetQuantizationScale(0.2f);
+    weightsDesc.SetQuantizationOffset(93);
+
+    armnn::TensorInfo biasesDesc({ outputChannels }, GetBiasTypeFromWeightsType(weightsDesc.GetDataType()).value());
+    biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale());
+    biasesDesc.SetQuantizationOffset(0);
+
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(
+        {
+            -1.2f, 6.1f, -3.5f,
+            18.8f, -5.5f, 2.9f
+        },
+        inputTensorInfo));
+
+    auto weights = MakeTensor<T, 2>(weightsDesc, ConvertToDataType<ArmnnType>(
+        {
+            -8.4f, 20.0f, -10.4f, -8, 16.4f, -11.8f,
+            23.4f, 10.4f, -14.0f, -3.8f, -11.8f, 11.4f
+        },
+        weightsDesc));
+
+    auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500});
+
+    result = SimpleFullyConnectedTestImpl<T>(
+            workloadFactory,
+            memoryManager,
+            inputTensorInfo, outputTensorInfo,
+            weightsDesc, biasesDesc,
+            weights, bias, input,
+            biasEnabled, true
+    );
+
+    if (biasEnabled)
+    {
+        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
+                                                 ConvertToDataType<ArmnnType>({80.f, 1460.f}, outputTensorInfo));
+    }
+    else
+    {
+        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
+                                                 ConvertToDataType<ArmnnType>({-107.04f, 110.f}, outputTensorInfo));
+    }
+
+    return result;
+}
+
+//
+// ArmNN variant of the AndroidNN fully_connected_float_large test.
+//
+// Tests the fully connected layer with large values, optionally transposing weights.
+// Note this is templated for consistency, but the nature of this tests makes it unlikely to be useful in Uint8 mode.
+//
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> FullyConnectedLargeTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool transposeWeights,
+    float qScale = 0.0f,
+    int32_t qOffset = 0)
+{
+    unsigned int inputWidth = 1;
+    unsigned int inputHeight = 1;
+    unsigned int inputChannels = 5;
+    unsigned int inputNum = 1;
+
+    unsigned int outputChannels = 1;
+    unsigned int outputNum = 1;
+
+    // Define the tensor descriptors.
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+    armnn::TensorInfo weightsDesc;
+    armnn::TensorInfo biasesDesc;
+
+    unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth };
+    unsigned int outputShape[] = { outputNum, outputChannels };
+    unsigned int weightsShape[] = { inputChannels, outputChannels };
+    if (transposeWeights)
+    {
+        std::swap(weightsShape[0], weightsShape[1]);
+    }
+
+    unsigned int biasShape[] = { outputChannels };
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(2, outputShape, ArmnnType);
+    weightsDesc = armnn::TensorInfo(2, weightsShape, ArmnnType);
+    biasesDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputTensorInfo,
+        QuantizedVector<T>(qScale, qOffset, {
+            1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f,
+        })
+    );
+
+    boost::multi_array<T, 2> weights = MakeTensor<T, 2>(weightsDesc,
+        QuantizedVector<T>(qScale, qOffset, {
+            2.0f, 3.0f, 4.0f, 5.0f, 6.0f
+        })
+    );
+
+    std::vector<T> biasValues({900000.f});
+    boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasesDesc, biasValues);
+
+    result = SimpleFullyConnectedTestImpl<T>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo, outputTensorInfo,
+        weightsDesc, biasesDesc,
+        weights, bias, input,
+        true, transposeWeights
+    );
+
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
+        QuantizedVector<T>(qScale, qOffset, {
+            965432.0f,
+        })
+    );
+
+    return result;
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 2>
+FullyConnectedTest<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 2>
+FullyConnectedTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+//
+// Implementation functions
+//
+
+LayerTestResult<float, 2> FullyConnectedFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    bool transposeWeights)
+{
+    unsigned int inputWidth = 1;
+    unsigned int inputHeight = 1;
+    unsigned int inputChannels = 5;
+    unsigned int inputNum = 2;
+
+    unsigned int outputChannels = 3;
+    unsigned int outputNum = 2;
+
+    // Define the tensor descriptors.
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+    armnn::TensorInfo weightsDesc;
+    armnn::TensorInfo biasesDesc;
+
+    unsigned int inputShape[]   = { inputNum, inputChannels, inputHeight, inputWidth };
+    unsigned int outputShape[]  = { outputNum, outputChannels };
+    unsigned int weightsShape[] = { inputChannels, outputChannels };
+
+    if (transposeWeights)
+    {
+        std::swap(weightsShape[0], weightsShape[1]);
+    }
+
+    unsigned int biasShape[] = { outputChannels };
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+    outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32);
+    weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32);
+    biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32);
+
+    LayerTestResult<float, 2> result(outputTensorInfo);
+
+    boost::multi_array<float, 4> input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(
+        {
+            1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+
+            5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+        })
+    );
+
+    boost::multi_array<float, 2> weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>(
+        {
+            .5f, 2.f, .5f,
+            .5f, 2.f, 1.f,
+            .5f, 2.f, 2.f,
+            .5f, 2.f, 3.f,
+            .5f, 2.f, 4.f
+        }));
+
+    if (transposeWeights)
+    {
+        weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>(
+        {
+            .5f, .5f, .5f, .5f, .5f,
+            2.f, 2.f, 2.f, 2.f, 2.f,
+            .5f, 1.f, 2.f, 3.f, 4.f
+        }));
+    }
+
+
+    std::vector<float> biasValues({0.f, 0.f, 0.f});
+    if (biasEnabled)
+    {
+        biasValues =  std::vector<float>({10.f, 20.f, 30.f});
+    }
+    boost::multi_array<float, 1> bias = MakeTensor<float, 1>(biasesDesc, biasValues);
+
+    result = SimpleFullyConnectedTestImpl<float>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo, outputTensorInfo,
+        weightsDesc, biasesDesc,
+        weights, bias, input,
+        biasEnabled, transposeWeights
+    );
+
+    result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, std::vector<float>(
+        {
+            0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0],
+            2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1],
+            0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2],
+
+            2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0],
+            10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1],
+            2.5f + 4.0f + 6.0f + 6.f + 4.f   + biasValues[2]
+        })
+    );
+
+    return result;
+}
+
+LayerTestResult<float, 2> FullyConnectedLargeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool transposeWeights)
+{
+    return FullyConnectedLargeTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, transposeWeights);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.hpp
new file mode 100644 (file)
index 0000000..8a2463c
--- /dev/null
@@ -0,0 +1,30 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> FullyConnectedTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled);
+
+LayerTestResult<float, 2> FullyConnectedFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool biasEnabled,
+    bool transposeWeights);
+
+LayerTestResult<float, 2> FullyConnectedLargeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool transposeWeights);
diff --git a/src/backends/backendsCommon/test/layerTests/GatherTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/GatherTestImpl.cpp
new file mode 100644 (file)
index 0000000..0118f54
--- /dev/null
@@ -0,0 +1,251 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GatherTestImpl.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template <armnn::DataType ArmnnType,
+          typename T = armnn::ResolveType<ArmnnType>,
+          size_t ParamsDim,
+          size_t IndicesDim,
+          size_t OutputDim>
+LayerTestResult<T, OutputDim> GatherTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorInfo& paramsInfo,
+    const armnn::TensorInfo& indicesInfo,
+    const armnn::TensorInfo& outputInfo,
+    const std::vector<T>& paramsData,
+    const std::vector<int32_t>& indicesData,
+    const std::vector<T>& outputData)
+{
+    auto params  = MakeTensor<T, ParamsDim>(paramsInfo, paramsData);
+    auto indices = MakeTensor<int32_t, IndicesDim>(indicesInfo, indicesData);
+
+    LayerTestResult<T, OutputDim> result(outputInfo);
+    result.outputExpected = MakeTensor<T, OutputDim>(outputInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> paramsHandle = workloadFactory.CreateTensorHandle(paramsInfo);
+    std::unique_ptr<armnn::ITensorHandle> indicesHandle = workloadFactory.CreateTensorHandle(indicesInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
+
+    armnn::GatherQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data,  info, paramsInfo, paramsHandle.get());
+    AddInputToWorkload(data, info, indicesInfo, indicesHandle.get());
+    AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateGather(data, info);
+
+    paramsHandle->Allocate();
+    indicesHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(paramsHandle.get(), params.origin());
+    CopyDataToITensorHandle(indicesHandle.get(), indices.origin());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+    return result;
+}
+
+template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 1> Gather1dParamsTestImpl(armnn::IWorkloadFactory& workloadFactory,
+                                             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo paramsInfo({ 8 }, ArmnnType);
+    armnn::TensorInfo indicesInfo({ 4 }, armnn::DataType::Signed32);
+    armnn::TensorInfo outputInfo({ 4 }, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        paramsInfo.SetQuantizationScale(1.0f);
+        paramsInfo.SetQuantizationOffset(1);
+        outputInfo.SetQuantizationScale(1.0f);
+        outputInfo.SetQuantizationOffset(1);
+    }
+    const std::vector<T> params         = std::vector<T>({ 1, 2, 3, 4, 5, 6, 7, 8 });
+    const std::vector<int32_t> indices  = std::vector<int32_t>({ 0, 2, 1, 5 });
+    const std::vector<T> expectedOutput = std::vector<T>({ 1, 3, 2, 6 });
+
+    return GatherTestImpl<ArmnnType, T, 1, 1, 1>(
+        workloadFactory,
+        memoryManager,
+        paramsInfo,
+        indicesInfo,
+        outputInfo,
+        params,
+        indices,
+        expectedOutput);
+}
+
+template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> GatherMultiDimParamsTestImpl(
+    armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo paramsInfo({ 5, 2 }, ArmnnType);
+    armnn::TensorInfo indicesInfo({ 3 }, armnn::DataType::Signed32);
+    armnn::TensorInfo outputInfo({ 3, 2 }, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        paramsInfo.SetQuantizationScale(1.0f);
+        paramsInfo.SetQuantizationOffset(1);
+        outputInfo.SetQuantizationScale(1.0f);
+        outputInfo.SetQuantizationOffset(1);
+    }
+
+    const std::vector<T> params         = std::vector<T>({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 });
+    const std::vector<int32_t> indices  = std::vector<int32_t>({ 1, 3, 4 });
+    const std::vector<T> expectedOutput = std::vector<T>({ 3, 4, 7, 8, 9, 10 });
+
+    return GatherTestImpl<ArmnnType, T, 2, 1, 2>(
+        workloadFactory,
+        memoryManager,
+        paramsInfo,
+        indicesInfo,
+        outputInfo,
+        params,
+        indices,
+        expectedOutput);
+}
+
+template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> GatherMultiDimParamsMultiDimIndicesTestImpl(
+    armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo paramsInfo({ 3, 2, 3}, ArmnnType);
+    armnn::TensorInfo indicesInfo({ 2, 3 }, armnn::DataType::Signed32);
+    armnn::TensorInfo outputInfo({ 2, 3, 2, 3 }, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        paramsInfo.SetQuantizationScale(1.0f);
+        paramsInfo.SetQuantizationOffset(1);
+        outputInfo.SetQuantizationScale(1.0f);
+        outputInfo.SetQuantizationOffset(1);
+    }
+
+    const std::vector<T> params =
+    {
+         1,  2,  3,
+         4,  5,  6,
+
+         7,  8,  9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    };
+
+    const std::vector<int32_t> indices = { 1, 2, 1, 2, 1, 0 };
+
+    const std::vector<T> expectedOutput =
+    {
+         7,  8,  9,
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18,
+         7,  8,  9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18,
+         7,  8,  9,
+        10, 11, 12,
+         1,  2,  3,
+         4,  5,  6
+    };
+
+    return GatherTestImpl<ArmnnType, T, 3, 2, 4>(
+        workloadFactory,
+        memoryManager,
+        paramsInfo,
+        indicesInfo,
+        outputInfo,
+        params,
+        indices,
+        expectedOutput);
+}
+
+} // anonymous namespace
+
+LayerTestResult<float, 1> Gather1dParamsFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Gather1dParamsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 1> Gather1dParamsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Gather1dParamsTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 1> Gather1dParamsInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Gather1dParamsTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> GatherMultiDimParamsFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return GatherMultiDimParamsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> GatherMultiDimParamsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return GatherMultiDimParamsTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 2> GatherMultiDimParamsInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return GatherMultiDimParamsTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> GatherMultiDimParamsMultiDimIndicesFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> GatherMultiDimParamsMultiDimIndicesUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> GatherMultiDimParamsMultiDimIndicesInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedSymm16>(
+        workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/GatherTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/GatherTestImpl.hpp
new file mode 100644 (file)
index 0000000..fd12e61
--- /dev/null
@@ -0,0 +1,47 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 1> Gather1dParamsFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 1> Gather1dParamsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 1> Gather1dParamsInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> GatherMultiDimParamsFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> GatherMultiDimParamsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> GatherMultiDimParamsInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> GatherMultiDimParamsMultiDimIndicesFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> GatherMultiDimParamsMultiDimIndicesUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> GatherMultiDimParamsMultiDimIndicesInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/L2NormalizationTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/L2NormalizationTestImpl.cpp
new file mode 100644 (file)
index 0000000..5c75b6f
--- /dev/null
@@ -0,0 +1,821 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "L2NormalizationTestImpl.hpp"
+
+#include <Permute.hpp>
+#include <ResolveType.hpp>
+#include <TensorUtils.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> L2NormalizationTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorShape& inputOutputTensorShape,
+    float scale,
+    int32_t offset,
+    const std::vector<float>& inputValues,
+    float outScale,
+    int32_t outOffset,
+    const std::vector<float>& expectedOutputValues,
+    const armnn::DataLayout layout,
+    float epsilon = 1e-12f)
+{
+    const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, ArmnnType, scale, offset);
+    const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, ArmnnType, outScale, outOffset);
+
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    std::vector<float> inputData = inputValues;
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+    }
+
+    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
+                                                         inputTensorInfo.GetQuantizationScale(),
+                                                         inputTensorInfo.GetQuantizationOffset(),
+                                                         inputData));
+
+    std::vector<float> expectedOutputData = expectedOutputValues;
+    if (layout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(expectedOutputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, expectedOutputData.data(), tmp.data(),
+                            sizeof(float));
+        expectedOutputData = tmp;
+    }
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
+                                                               outputTensorInfo.GetQuantizationScale(),
+                                                               outputTensorInfo.GetQuantizationOffset(),
+                                                               expectedOutputData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::L2NormalizationQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Eps = epsilon;
+    descriptor.m_Parameters.m_DataLayout = layout;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+}
+
+float CalcInvL2Norm(std::initializer_list<float> elements)
+{
+    const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f,
+        [](float acc, float element) { return acc + element * element; });
+    return 1.0f / sqrtf(reduction);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> L2NormalizationEpsilonTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float scale,
+        int32_t offset,
+        float outScale,
+        int32_t outOffset,
+        const armnn::DataLayout layout,
+        float epsilon)
+{
+    // Width: 1
+    // Height: 1
+    // Channels: 3
+    // BatchSize: 1
+    unsigned int numberOfBatches = 1;
+    unsigned int numberOfChannels = 3;
+    unsigned int height = 1;
+    unsigned int width = 1;
+
+    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
+            numberOfBatches, numberOfChannels, height, width, layout);
+
+    // 0.0000001^2 + 0.00000002^2 + 0.00000003^2 < 1e-12
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (1) x Width (1)
+        0.00000001f,
+
+        // Batch 0, Channel 1, Height (1) x Width (1)
+        0.00000002f,
+
+        // Batch 0, Channel 2, Height (1) x Width (1)
+        0.00000003f,
+    };
+
+    const float approxInvL2Norm = 1.f / sqrtf(epsilon);
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (1) x Width (1)
+        0.00000001f * approxInvL2Norm,
+        0.00000002f * approxInvL2Norm,
+        0.00000003f * approxInvL2Norm,
+    };
+
+    return L2NormalizationTestImpl<ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        scale,
+        offset,
+        inputValues,
+        outScale,
+        outOffset,
+        expectedOutputValues,
+        layout,
+        epsilon);
+}
+
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> L2Normalization1dTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float scale,
+        int32_t offset,
+        float outScale,
+        int32_t outOffset,
+        const armnn::DataLayout layout)
+{
+    // Width: 1
+    // Height: 1
+    // Channels: 10
+    // BatchSize: 1
+    unsigned int numberOfBatches = 1;
+    unsigned int numberOfChannels = 10;
+    unsigned int height = 1;
+    unsigned int width = 1;
+
+
+    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
+            numberOfBatches, numberOfChannels, height, width, layout);
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (1) x Width (1)
+        1.0f,
+
+        // Batch 0, Channel 1, Height (1) x Width (1)
+        2.0f,
+
+        // Batch 0, Channel 2, Height (1) x Width (1)
+        3.0f,
+
+        // Batch 0, Channel 3, Height (1) x Width (1)
+        4.0f,
+
+        // Batch 0, Channel 4, Height (1) x Width (1)
+        5.0f,
+
+        // Batch 0, Channel 5, Height (1) x Width (1)
+        6.0f,
+
+        // Batch 0, Channel 6, Height (1) x Width (1)
+        7.0f,
+
+        // Batch 0, Channel 7, Height (1) x Width (1)
+        8.0f,
+
+        // Batch 0, Channel 8, Height (1) x Width (1)
+        9.0f,
+
+        // Batch 0, Channel 9, Height (1) x Width (1)
+        10.0f
+    };
+    const float approxInvL2Norm = 0.050964719f;
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (1) x Width (1)
+        1.0f * approxInvL2Norm,
+        2.0f * approxInvL2Norm,
+        3.0f * approxInvL2Norm,
+        4.0f * approxInvL2Norm,
+        5.0f * approxInvL2Norm,
+        6.0f * approxInvL2Norm,
+        7.0f * approxInvL2Norm,
+        8.0f * approxInvL2Norm,
+        9.0f * approxInvL2Norm,
+        10.0f * approxInvL2Norm
+    };
+
+
+    return L2NormalizationTestImpl<ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        scale,
+        offset,
+        inputValues,
+        outScale,
+        outOffset,
+        expectedOutputValues,
+        layout);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> L2Normalization2dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float scale,
+    int32_t offset,
+    float outScale,
+    int32_t outOffset,
+    const armnn::DataLayout layout)
+{
+    // Width: 5
+    // Height: 1
+    // Channels: 2
+    // BatchSize: 1
+    unsigned int numberOfBatches = 1;
+    unsigned int numberOfChannels = 2;
+    unsigned int height = 1;
+    unsigned int width = 5;
+
+    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
+            numberOfBatches, numberOfChannels, height, width, layout);
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (1) x Width (5)
+        1.0f, 3.0f, 5.0f, 7.0f,  9.0f,
+
+        // Batch 0, Channel 1, Height (1) x Width (5)
+        2.0f, 4.0f, 6.0f, 8.0f, 10.0f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (1) x Width (5)
+        1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
+        3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
+        5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
+        7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
+        9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
+
+        // Batch 0, Channel 1, Height (1) x Width (5)
+        2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
+        4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
+        6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
+        8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
+        10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
+    };
+
+    return L2NormalizationTestImpl<ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        scale,
+        offset,
+        inputValues,
+        outScale,
+        outOffset,
+        expectedOutputValues,
+        layout);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> L2Normalization3dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float scale,
+    int32_t offset,
+    float outScale,
+    int32_t outOffset,
+    const armnn::DataLayout layout)
+{
+    // Width: 3
+    // Height: 4
+    // Channels: 2
+    // BatchSize: 1
+    unsigned int numberOfBatches = 1;
+    unsigned int numberOfChannels = 2;
+    unsigned int height = 4;
+    unsigned int width = 3;
+
+    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
+            numberOfBatches, numberOfChannels, height, width, layout);
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (4) x Width (3)
+        119.0f,  21.0f, 150.0f,
+        149.0f,  32.0f, 179.0f,
+        15.0f, 227.0f, 141.0f,
+        147.0f, 199.0f, 220.0f,
+
+        // Batch 0, Channel 1, Height (4) x Width (3)
+        110.0f, 140.0f,  73.0f,
+        211.0f, 212.0f,  89.0f,
+        24.0f, 138.0f, 188.0f,
+        162.0f,  12.0f, 161.0f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (4) x Width (3)
+        119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
+        21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
+        150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
+        149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
+        32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
+        179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
+        15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
+        227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
+        141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
+        147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
+        199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
+        220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
+
+        // Batch 0, Channel 1, Height (4) x Width (3)
+        110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
+        140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
+        73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
+        211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
+        212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
+        89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
+        24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
+        138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
+        188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
+        162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
+        12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
+        161.0f * CalcInvL2Norm({ 220.0f, 161.0f })
+    };
+
+    return L2NormalizationTestImpl<ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        scale,
+        offset,
+        inputValues,
+        outScale,
+        outOffset,
+        expectedOutputValues,
+        layout);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> L2Normalization4dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float scale,
+    int32_t offset,
+    float outScale,
+    int32_t outOffset,
+    const armnn::DataLayout layout)
+{
+    // Width: 3
+    // Height: 4
+    // Channels: 3
+    // BatchSize: 2
+    unsigned int numberOfBatches = 2;
+    unsigned int numberOfChannels = 3;
+    unsigned int height = 4;
+    unsigned int width = 3;
+
+    const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
+            numberOfBatches, numberOfChannels, height, width, layout);
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (4) x Width (3)
+        235.0f,  46.0f, 178.0f,
+        100.0f, 123.0f,  19.0f,
+        172.0f,  74.0f, 250.0f,
+        6.0f, 195.0f,  80.0f,
+
+        // Batch 0, Channel 1, Height (4) x Width (3)
+        113.0f,  95.0f, 202.0f,
+        77.0f, 114.0f,  71.0f,
+        122.0f, 246.0f, 166.0f,
+        82.0f,  28.0f,  37.0f,
+
+        // Batch 0, Channel 2, Height (4) x Width (3)
+        56.0f, 170.0f, 162.0f,
+        194.0f,  89.0f, 254.0f,
+        12.0f, 209.0f, 200.0f,
+        1.0f,  64.0f,  54.0f,
+
+        // Batch 1, Channel 0, Height (4) x Width (3)
+        67.0f,  90.0f,  49.0f,
+        7.0f, 163.0f,  18.0f,
+        25.0f, 117.0f, 103.0f,
+        247.0f,  59.0f, 189.0f,
+
+        // Batch 1, Channel 1, Height (4) x Width (3)
+        239.0f, 104.0f, 199.0f,
+        17.0f, 124.0f, 153.0f,
+        222.0f, 217.0f, 75.0f,
+        32.0f, 126.0f, 21.0f,
+
+        // Batch 1, Channel 2, Height (4) x Width (3)
+        97.0f, 145.0f, 215.0f,
+        115.0f, 116.0f, 238.0f,
+        226.0f,  16.0f, 132.0f,
+        92.0f, 125.0f,  88.0f
+    };
+    std::vector<float> expectedOutputValues
+    {
+        // Batch 0, Channel 0, Height (4) x Width (3)
+        235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
+        46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
+        178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
+        100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
+        123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
+        19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
+        172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
+        74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
+        250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
+        6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
+        195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
+        80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
+
+        // Batch 0, Channel 1, Height (4) x Width (3)
+        113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
+        95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
+        202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
+        77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
+        114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
+        71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
+        122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
+        246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
+        166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
+        82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
+        28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
+        37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
+
+        // Batch 0, Channel 2, Height (4) x Width (3)
+        56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
+        170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
+        162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
+        194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
+        89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
+        254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
+        12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
+        209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
+        200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
+        1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
+        64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
+        54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
+
+        // Batch 1, Channel 0, Height (4) x Width (3)
+        67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
+        90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
+        49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
+        7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
+        163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
+        18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
+        25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
+        117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
+        103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
+        247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
+        59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
+        189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
+
+        // Batch 1, Channel 1, Height (4) x Width (3)
+        239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
+        104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
+        199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
+        17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
+        124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
+        153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
+        222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
+        217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
+        75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
+        32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
+        126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
+        21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
+
+        // Batch 1, Channel 2, Height (4) x Width (3)
+        97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
+        145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
+        215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
+        115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
+        116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
+        238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
+        226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
+        16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
+        132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
+        92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
+        125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
+        88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f })
+    };
+
+    return L2NormalizationTestImpl<ArmnnType>(
+        workloadFactory,
+        memoryManager,
+        inputOutputShape,
+        scale,
+        offset,
+        inputValues,
+        outScale,
+        outOffset,
+        expectedOutputValues,
+        layout);
+}
+
+} // anonymous namespace
+
+LayerTestResult<float, 4> L2NormalizationDefaultEpsilonTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout layout)
+{
+    // Dummy descriptor to get the default value of epsilon.
+    armnn::L2NormalizationDescriptor descriptor;
+
+    return L2NormalizationEpsilonTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        0.f,
+        0,
+        layout,
+        descriptor.m_Eps);
+}
+
+LayerTestResult<float, 4> L2NormalizationNonDefaultEpsilonTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout layout)
+{
+    return L2NormalizationEpsilonTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        0.f,
+        0,
+        layout,
+        1e-9f);
+}
+
+LayerTestResult<float, 4> L2Normalization1dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        0.f,
+        0,
+        layout);
+}
+
+LayerTestResult<int16_t, 4> L2Normalization1dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f,
+        0,
+        layout);
+}
+
+LayerTestResult<uint8_t, 4> L2Normalization1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f / 128,
+        128,
+        layout);
+}
+
+LayerTestResult<float, 4> L2Normalization2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization2dTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        0.f,
+        0,
+        layout);
+}
+
+LayerTestResult<int16_t, 4> L2Normalization2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f,
+        0,
+        layout);
+}
+
+LayerTestResult<uint8_t, 4> L2Normalization2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f / 128,
+        128,
+        layout);
+}
+
+LayerTestResult<float, 2> L2Normalization2dShapeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const armnn::DataLayout layout = armnn::DataLayout::NHWC;
+    const armnn::TensorShape inputOutputTensorShape = armnn::TensorShape({ 5, 2 });
+
+    std::vector<float> inputData
+    {
+        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f
+    };
+    std::vector<float> expectedOutputData
+    {
+        1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
+        2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
+        3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
+        4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
+        5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
+        6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
+        7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
+        8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
+        9.0f  * CalcInvL2Norm({ 9.0f, 10.0f }),
+        10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
+    };
+
+    const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
+    const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
+
+    auto inputTensor = MakeTensor<float, 2>(inputTensorInfo, QuantizedVector<float>(
+                                                             inputTensorInfo.GetQuantizationScale(),
+                                                             inputTensorInfo.GetQuantizationOffset(),
+                                                             inputData));
+
+    LayerTestResult<float, 2> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, QuantizedVector<float>(
+                                                                   outputTensorInfo.GetQuantizationScale(),
+                                                                   outputTensorInfo.GetQuantizationOffset(),
+                                                                   expectedOutputData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::L2NormalizationQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Eps = 1e-12f;
+    descriptor.m_Parameters.m_DataLayout = layout;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+
+    workload->PostAllocationConfigure();
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
+
+    return result;
+}
+
+LayerTestResult<float, 4> L2Normalization3dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization3dTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        0.f,
+        0,
+        layout);
+}
+
+LayerTestResult<int16_t, 4> L2Normalization3dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f,
+        0,
+        layout);
+}
+
+LayerTestResult<uint8_t, 4> L2Normalization3dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f / 128,
+        128,
+        layout);
+}
+
+LayerTestResult<float, 4> L2Normalization4dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization4dTestCommon<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        0.f,
+        0,
+        0.f,
+        0,
+        layout);
+}
+
+LayerTestResult<int16_t, 4> L2Normalization4dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f,
+        0,
+        layout);
+}
+
+LayerTestResult<uint8_t, 4> L2Normalization4dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        1.f,
+        0,
+        1.f / 128,
+        128,
+        layout);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/L2NormalizationTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/L2NormalizationTestImpl.hpp
new file mode 100644 (file)
index 0000000..78c2ac1
--- /dev/null
@@ -0,0 +1,87 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <armnn/Types.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> L2NormalizationDefaultEpsilonTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> L2NormalizationNonDefaultEpsilonTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> L2Normalization1dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> L2Normalization1dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> L2Normalization1dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> L2Normalization2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> L2Normalization2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> L2Normalization2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 2> L2Normalization2dShapeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> L2Normalization3dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> L2Normalization3dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> L2Normalization3dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<float, 4> L2Normalization4dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<int16_t, 4> L2Normalization4dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
+LayerTestResult<uint8_t, 4> L2Normalization4dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
@@ -2,24 +2,27 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "QuantizeHelper.hpp"
-#include "WorkloadTestUtils.hpp"
+#include "LstmTestImpl.hpp"
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
-
-#include <test/TensorHelpers.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
 
-#include "reference/workloads/LstmUtils.hpp"
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <reference/workloads/Decoders.hpp>
+#include <reference/workloads/Encoders.hpp>
+#include <reference/workloads/LstmUtils.hpp>
 
-//LstmUtils Tests
-// TODO: Add tests for the remaining functions in LstmUtils.hpp
+#include <test/TensorHelpers.hpp>
+
+#include <boost/multi_array.hpp>
+
+namespace
+{
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 void LstmUtilsVectorBatchVectorAddTestImpl(
@@ -52,8 +55,8 @@ template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 void LstmUtilsZeroVectorTestImpl(
         boost::multi_array<float, 1>& input,
         uint32_t vSize,
-        boost::multi_array<float, 1>& expectedOutput) {
-
+        boost::multi_array<float, 1>& expectedOutput)
+{
     float qScale = 0.0f;
     int32_t qOffset = 0;
 
@@ -74,7 +77,6 @@ void LstmUtilsZeroVectorTestImpl(
 
 }
 
-
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 void LstmUtilsMeanStddevNormalizationTestImpl(
         boost::multi_array<float, 2>& input,
@@ -1270,7 +1272,6 @@ LayerTestResult<T, 2> LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(
     return ret3;
 }
 
-
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 2>
 LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadFactory& workloadFactory,
@@ -1539,18 +1540,14 @@ LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadF
     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
 
     return ret;
-
 }
 
-// QuantizedLstm tests:
-
-LayerTestResult<uint8_t, 2>
-QuantizedLstmTestImpl(armnn::IWorkloadFactory& workloadFactory,
-                      const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-                      const boost::multi_array<uint8_t, 2>& input,
-                      const boost::multi_array<uint8_t, 2>& outputExpected)
+LayerTestResult<uint8_t, 2> QuantizedLstmTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const boost::multi_array<uint8_t, 2>& input,
+    const boost::multi_array<uint8_t, 2>& outputExpected)
 {
-
     auto numBatches = boost::numeric_cast<unsigned int>(input.shape()[0]);
     auto inputSize = boost::numeric_cast<unsigned int>(input.shape()[1]);
     auto outputSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
@@ -1729,4 +1726,347 @@ QuantizedLstmTestImpl(armnn::IWorkloadFactory& workloadFactory,
     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
 
     return ret;
-}
\ No newline at end of file
+}
+
+} // anonymous namespace
+
+#if defined(ARMNNREF_ENABLED)
+
+// The LSTM test units are run only for the reference backend at the moment
+
+void LstmUtilsZeroVectorTest()
+{
+    armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32);
+    boost::multi_array<float, 1> input = MakeTensor<float, 1>(inputDesc, std::vector<float>(
+            {2., 3., 3., 4.}));
+
+    boost::multi_array<float, 1> expectedOutput = MakeTensor<float, 1>(inputDesc, std::vector<float>(
+            {0., 0., 0., 0.}));
+
+    return LstmUtilsZeroVectorTestImpl<armnn::DataType::Float32>(input, 4, expectedOutput);
+}
+
+void LstmUtilsMeanStddevNormalizationNoneZeroInputTest()
+{
+    uint32_t batchSize = 2;
+    uint32_t vecSize = 4;
+    armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            { 0.1f, 0.2f, 0.3f, 0.4f,      //batch 0
+              0.9f, 1.0f, 1.1f, 1.2f }));  //batch 1
+
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            { -1.34164071f, -0.447213531f, 0.44721365f,  1.34164071f,      //batch 0
+              -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f  }));  //batch 1
+
+    return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
+            vecSize, batchSize, expectedOutput);
+}
+
+void LstmUtilsMeanStddevNormalizationAllZeroInputTest()
+{
+    uint32_t batchSize = 2;
+    uint32_t vecSize = 4;
+    armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
+              0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
+
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
+              0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
+
+    return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
+            vecSize, batchSize, expectedOutput);
+}
+
+void LstmUtilsMeanStddevNormalizationMixedZeroInputTest()
+{
+    uint32_t batchSize = 2;
+    uint32_t vecSize = 4;
+    armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
+              0.1f, 0.2f, 0.3f, 0.4f }));  //batch 1
+
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            {         0.0f,          0.0f,        0.0f,        0.0f,      //batch 0
+              -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f }));  //batch 1
+
+    return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
+            vecSize, batchSize, expectedOutput);
+}
+
+void LstmUtilsVectorBatchVectorCwiseProductTest()
+{
+    uint32_t batchSize = 4;
+    uint32_t vecSize = 29;
+    armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
+            {   1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f, 10.1f,
+              11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f,
+              21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,     0.0f}));
+
+    armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+            { /* batch 0 */
+                1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f,  10.1f,
+              11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f,  20.2f,
+              21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,   0.0f,
+              /* batch 1 */
+                -1.1f,   -2.2f,   -3.3f,   -4.4f,   -5.5f,   -6.6f,   -7.7f,   -8.8f,   -9.9f, -10.1f,
+              -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f,
+              -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f,    0.0f,
+              /* batch 2 */
+                1.1f,   -2.2f,   3.3f,   -4.4f,   5.5f,   -6.6f,   7.7f,   -8.8f,   9.9f, -10.1f,
+              11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f,
+              21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f,   0.0f,
+              /* batch 3 */
+                -1.1f,   2.2f,   -3.3f,   4.4f,   -5.5f,   6.6f,   -7.7f,   8.8f,   -9.9f, 10.1f,
+              -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f,
+              -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f,    0.0f}));
+
+    // Expect output = input * output + output.
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+            { /* batch 0 */
+                 1.210000f,    4.840000f,   10.889999f,   19.360001f,   30.250000f,   43.559998f,
+                59.289997f,   77.440002f,   98.009995f,  102.010010f,  123.432091f,  146.894394f,
+               172.396896f,  199.939606f,  229.522491f,  261.145599f,  294.808899f,  330.512421f,
+               368.256134f,  408.040039f,  449.864075f,  493.728363f,  539.632874f,  587.577576f,
+               637.562500f,  689.587585f,  743.652954f,  799.758423f,    0.000000f,
+              /* batch 1 */
+                -1.210000f,   -4.840000f,  -10.889999f,  -19.360001f,  -30.250000f,  -43.559998f,
+               -59.289997f,  -77.440002f,  -98.009995f, -102.010010f, -123.432091f, -146.894394f,
+              -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f,
+              -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f,
+              -637.562500f, -689.587585f, -743.652954f, -799.758423f,    0.000000f,
+              /* batch 2 */
+                 1.210000f,   -4.840000f,  10.889999f,   -19.360001f,   30.250000f,  -43.559998f,
+                59.289997f,  -77.440002f,  98.009995f,  -102.010010f,  123.432091f, -146.894394f,
+               172.396896f, -199.939606f, 229.522491f,  -261.145599f,  294.808899f, -330.512421f,
+               368.256134f, -408.040039f, 449.864075f,  -493.728363f,  539.632874f, -587.577576f,
+               637.562500f, -689.587585f, 743.652954f,  -799.758423f,    0.000000f,
+              /* batch 3 */
+                -1.210000f,    4.840000f,  -10.889999f,   19.360001f,  -30.250000f,   43.559998f,
+               -59.289997f,   77.440002f,  -98.009995f,  102.010010f, -123.432091f,  146.894394f,
+              -172.396896f,  199.939606f, -229.522491f,  261.145599f, -294.808899f,  330.512421f,
+              -368.256134f,  408.040039f, -449.864075f,  493.728363f, -539.632874f,  587.577576f,
+              -637.562500f,  689.587585f, -743.652954f,  799.758423f,    0.000000f}));
+
+    return LstmUtilsVectorBatchVectorCwiseProductTestImpl<armnn::DataType::Float32>(vector, batchVector,
+            vecSize, batchSize, expectedOutput);
+}
+
+void LstmUtilsVectorBatchVectorAddTest()
+{
+    uint32_t batchSize = 2;
+    uint32_t vecSize = 3;
+    armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
+            { 0.0f, -0.5f, 1.0f}));
+
+    armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+            { 1.0f, 2.0f, 3.0f,    //batch 0
+              4.0f, 5.0f, 6.0f})); //batch 1
+
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
+            { 1.0f, 1.5f, 4.0f,
+              4.0f, 4.5f, 7.0f}));
+
+    return LstmUtilsVectorBatchVectorAddTestImpl<armnn::DataType::Float32>(vector, batchVector,
+            vecSize, batchSize, expectedOutput);
+}
+
+#endif
+
+LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputDesc({ 2, 2 }, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            { 2., 3., 3., 4. }));
+
+    armnn::TensorInfo outputDesc({ 2, 4 }, armnn::DataType::Float32);
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
+            {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
+             -0.42734814f, -0.00478661f,  0.13455015f, -0.03560682f}));
+    return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, input, expectedOutput);
+}
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
+             0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}));
+
+    armnn::TensorInfo outputDesc({ 2, 16 }, armnn::DataType::Float32);
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
+            {-0.00396806f, 0.029352f,     -0.00279226f, 0.0159977f,   -0.00835576f,
+             -0.0211779f,  0.0283512f,    -0.0114597f,  0.00907307f,  -0.0244004f,
+             -0.0152191f,  -0.0259063f,   0.00914318f,  0.00415118f,  0.017147f,
+             0.0134203f, -0.013869f,    0.0287268f,   -0.00334693f, 0.00733398f,  -0.0287926f,
+             -0.0186926f,   0.0193662f,   -0.0115437f,  0.00422612f,  -0.0345232f,
+             0.00223253f,   -0.00957321f, 0.0210624f,   0.013331f,    0.0150954f,
+             0.02168f}));
+    return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, input, expectedOutput);
+}
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            {2., 3., 3., 4.}));
+
+    armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::Float32);
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
+            {{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
+              -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f}}));
+
+    return LstmNoCifgNoPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, input, expectedOutput);
+}
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
+    boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
+            {0.7f, 0.8f, 0.1f, 0.2f, 0.3f,     //batch 0
+             0.3f, 0.2f, 0.9f, 0.8f, 0.1f}));  //batch 1
+
+    armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32);
+    boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
+            {  0.0244077f,  0.128027f, -0.00170918f,    //batch 0
+             -0.00692428f, 0.0848741f,    0.063445f})); //batch 1
+    return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<armnn::DataType::Float32>(
+            workloadFactory, memoryManager, input, expectedOutput);
+}
+
+LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const float qScale = 1.0f;
+    const int32_t qOffset = 0;
+
+    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
+    const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
+
+    armnn::TensorInfo inputDesc({2, 2}, datatype);
+    boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(inputDesc, QuantizedVector<int16_t>(qScale, qOffset,
+            std::vector<float>{2., 3., 3., 4.}));
+
+    armnn::TensorInfo outputDesc({2, 4}, datatype);
+    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
+            qOffset, std::vector<float>({{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
+                                          -0.0185422f,  0.11281417f,  0.24466537f, -0.1826292f}})));
+
+    return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
+        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
+
+}
+
+LayerTestResult<int16_t, 2> LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const float qScale = 1.0f;
+    const int32_t qOffset = 0;
+
+    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
+    const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
+
+    armnn::TensorInfo inputDesc({ 2, 2 }, datatype);
+    boost::multi_array<int16_t, 2> input = MakeTensor<int16_t, 2>(inputDesc, QuantizedVector<int16_t>(qScale, qOffset,
+            std::vector<float>({ 2., 3., 3., 4. })));
+
+    armnn::TensorInfo outputDesc({ 2, 4 }, datatype);
+    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
+            qOffset, std::vector<float>(
+            {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
+             -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f})));
+
+    return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<datatype>(
+        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
+}
+
+LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const float qScale = 2.0f;
+    const int32_t qOffset = 0;
+
+    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
+    const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
+
+    armnn::TensorInfo inputDesc({ 2, 5 }, datatype);
+    boost::multi_array<int16_t, 2> input = MakeTensor<int16_t, 2>(inputDesc, QuantizedVector<int16_t>(qScale,
+            qOffset, std::vector<float>(
+            {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
+             0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f})));
+
+    armnn::TensorInfo outputDesc({ 2, 16 }, datatype);
+    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
+            qOffset, std::vector<float>(
+            {-0.00396806f,  0.029352f,   -0.00279226f, 0.0159977f,  -0.00835576f,
+             -0.0211779f,   0.0283512f,  -0.0114597f,  0.00907307f, -0.0244004f,
+             -0.0152191f,  -0.0259063f,   0.00914318f, 0.00415118f,  0.017147f,
+              0.0134203f,  -0.013869f,    0.0287268f, -0.00334693f,  0.00733398f, -0.0287926f,
+             -0.0186926f,   0.0193662f,  -0.0115437f,  0.00422612f, -0.0345232f,
+              0.00223253f, -0.00957321f,  0.0210624f,  0.013331f,    0.0150954f,   0.02168f})));
+
+    return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<datatype>(
+        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
+}
+
+LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const float qScale = 1.0f;
+    const int32_t qOffset = 0;
+
+    const armnn::DataType datatype = armnn::DataType::QuantisedSymm16; // datatype & constants set to QSymm16
+
+    armnn::TensorInfo inputDesc({2, 2}, datatype);
+    boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(inputDesc, QuantizedVector<int16_t>(qScale,
+            qOffset, std::vector<float>{2., 3., 3., 4.}));
+
+    armnn::TensorInfo outputDesc({2, 4}, datatype);
+    boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
+            qOffset, std::vector<float>({{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
+                                          -0.0185422f,  0.11281417f,  0.24466537f, -0.1826292f}})));
+
+    return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
+        workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, datatype);
+}
+
+//
+// QuantizedLstm
+//
+
+LayerTestResult<uint8_t, 2> QuantizedLstmTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> input = MakeTensor<uint8_t, 2>(inputDesc, std::vector<uint8_t>(
+        {166, 179, 50, 150}));
+
+    armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> expectedOutput = MakeTensor<uint8_t, 2>(outputDesc, std::vector<uint8_t>(
+        {140, 151, 146, 112, 136, 156, 142, 112 }));
+
+    return QuantizedLstmTestImpl(workloadFactory, memoryManager, input, expectedOutput);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp
new file mode 100644 (file)
index 0000000..2779009
--- /dev/null
@@ -0,0 +1,60 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#if defined(ARMNNREF_ENABLED)
+void LstmUtilsZeroVectorTest();
+void LstmUtilsMeanStddevNormalizationNoneZeroInputTest();
+void LstmUtilsMeanStddevNormalizationAllZeroInputTest();
+void LstmUtilsMeanStddevNormalizationMixedZeroInputTest();
+void LstmUtilsVectorBatchVectorCwiseProductTest();
+void LstmUtilsVectorBatchVectorAddTest();
+#endif
+
+LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+//
+// QuantizedLstm
+//
+
+LayerTestResult<uint8_t, 2> QuantizedLstmTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/MeanTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/MeanTestImpl.hpp
new file mode 100644 (file)
index 0000000..d0bdfa4
--- /dev/null
@@ -0,0 +1,178 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T, std::size_t InputDim, std::size_t OutputDim>
+LayerTestResult<T, OutputDim> MeanTestHelper(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const unsigned int* inputShape,
+        const std::vector<float>& inputData,
+        const std::vector<unsigned int>& axis,
+        bool keepDims,
+        const unsigned int* outputShape,
+        const std::vector<float>& outputData,
+        float scale = 1.0f,
+        int32_t offset = 0)
+{
+    armnn::TensorInfo inputTensorInfo(InputDim, inputShape, ArmnnType);
+    armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, ArmnnType);
+
+    inputTensorInfo.SetQuantizationScale(scale);
+    inputTensorInfo.SetQuantizationOffset(offset);
+
+    outputTensorInfo.SetQuantizationScale(scale);
+    outputTensorInfo.SetQuantizationOffset(offset);
+
+    auto input = MakeTensor<T, InputDim>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
+
+    LayerTestResult<T, OutputDim> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, OutputDim>(
+            outputTensorInfo, ConvertToDataType<ArmnnType>(outputData, outputTensorInfo));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::MeanQueueDescriptor data;
+    data.m_Parameters.m_Axis = axis;
+    data.m_Parameters.m_KeepDims = keepDims;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data,  info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMean(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+    return result;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 1> MeanSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 3, 2 };
+    const unsigned int outputShape[] = { 1 };
+
+    std::vector<float> input({ 1.5f, 1.5f, 2.5f, 2.5f, 3.5f, 3.5f });
+    std::vector<float> output({ 2.5f });
+
+    return MeanTestHelper<ArmnnType, T, 2, 1>(
+            workloadFactory, memoryManager, inputShape, input, {}, false, outputShape, output);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> MeanSimpleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 2, 3, 1, 2 };
+    const unsigned int outputShape[] = { 3, 1, 2 };
+
+    std::vector<float> input({ 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f });
+    std::vector<float> output({ 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f });
+
+    return MeanTestHelper<ArmnnType, T, 4, 3>(
+            workloadFactory, memoryManager, inputShape, input, { 0 }, false, outputShape, output);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> MeanKeepDimsTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 1, 1, 3, 2 };
+    const unsigned int outputShape[] = { 1, 1, 1, 2 };
+
+    std::vector<float> input({ 1.5f, 1.5f, 2.5f, 2.5f, 3.5f, 3.5f });
+    std::vector<float> output({ 2.5f, 2.5f });
+
+    return MeanTestHelper<ArmnnType, T, 4, 4>(
+            workloadFactory, memoryManager, inputShape, input, { 2 }, true, outputShape, output);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> MeanMultipleDimsTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 2, 3, 1, 2 };
+    const unsigned int outputShape[] = { 1, 3, 1, 1 };
+
+    std::vector<float> input({ 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5 });
+    std::vector<float> output({ 2.0f, 4.0f, 6.0f });
+
+    return MeanTestHelper<ArmnnType, T, 4, 4>(
+            workloadFactory, memoryManager, inputShape, input, { 0, 3 }, true, outputShape, output);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 1> MeanVts1Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 4, 3, 2 };
+    const unsigned int outputShape[] = { 2 };
+
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
+                               15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
+    std::vector<float> output({ 12.0f, 13.0f });
+
+    return MeanTestHelper<ArmnnType, T, 3, 1>(
+            workloadFactory, memoryManager, inputShape, input, { 0, 1 }, false, outputShape, output);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> MeanVts2Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 4, 3, 2 };
+    const unsigned int outputShape[] = { 1, 3, 1 };
+
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
+                               15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
+    std::vector<float> output({ 10.5f, 12.5f, 14.5f });
+
+    return MeanTestHelper<ArmnnType, T, 3, 3>(
+            workloadFactory, memoryManager, inputShape, input, { 0, 2 }, true, outputShape, output);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> MeanVts3Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = { 1, 2, 2, 1 };
+    const unsigned int outputShape[] = { 1, 2, 1 };
+
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f });
+    std::vector<float> output({ 1.5f, 3.5f });
+
+    return MeanTestHelper<ArmnnType, T, 4, 3>(
+            workloadFactory, memoryManager, inputShape, input, { 2 }, false, outputShape, output);
+}
@@ -3,15 +3,20 @@
 // SPDX-License-Identifier: MIT
 //
 
-#include "WorkloadTestUtils.hpp"
+#include "NormalizationTestImpl.hpp"
 
 #include <armnn/Exceptions.hpp>
 #include <armnn/LayerSupport.hpp>
-#include <armnn/Types.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
 
 LayerTestResult<float,4> SimpleNormalizationTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
@@ -347,3 +352,41 @@ LayerTestResult<float,4> CompareNormalizationTestImpl(
     return ret;
 }
 
+} // anonymous namespace
+
+LayerTestResult<float,4> SimpleNormalizationAcrossTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
+    auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
+    return SimpleNormalizationTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
+}
+
+LayerTestResult<float,4> SimpleNormalizationWithinTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
+    auto normChannel = armnn::NormalizationAlgorithmChannel::Within;
+    return SimpleNormalizationTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
+}
+
+LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
+    auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
+    return SimpleNormalizationNhwcTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
+}
+
+LayerTestResult<float,4> CompareNormalizationTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::NormalizationAlgorithmChannel normChannel,
+    armnn::NormalizationAlgorithmMethod normMethod)
+{
+    return CompareNormalizationTestImpl(workloadFactory, memoryManager, refWorkloadFactory, normChannel, normMethod);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/NormalizationTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/NormalizationTestImpl.hpp
new file mode 100644 (file)
index 0000000..be66f6c
--- /dev/null
@@ -0,0 +1,32 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <armnn/Types.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> SimpleNormalizationAcrossTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SimpleNormalizationWithinTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> CompareNormalizationTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::NormalizationAlgorithmChannel normChannel,
+    armnn::NormalizationAlgorithmMethod normMethod);
diff --git a/src/backends/backendsCommon/test/layerTests/PadTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/PadTestImpl.cpp
new file mode 100644 (file)
index 0000000..82b772e
--- /dev/null
@@ -0,0 +1,497 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PadTestImpl.hpp"
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+//
+// Implementation templates
+//
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> Pad2dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    const float customPaddingValue)
+{
+    const armnn::TensorShape inputShape{ 3, 3 };
+    const armnn::TensorShape outputShape{ 7, 7 };
+
+    const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
+    const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
+
+    std::vector<T> inputValues(
+    QuantizedVector<T>(qScale, qOffset,
+    {
+      // Height (3) x Width (3)
+      4, 8, 6,
+      7, 4, 4,
+      3, 2, 4
+    }));
+
+    auto p = customPaddingValue;
+    std::vector<T> expectedOutputValues;
+    expectedOutputValues = (
+    QuantizedVector<T>(qScale, qOffset,
+    {
+      p, p, p, p, p, p, p,
+      p, p, p, p, p, p, p,
+      p, p, 4, 8, 6, p, p,
+      p, p, 7, 4, 4, p, p,
+      p, p, 3, 2, 4, p, p,
+      p, p, p, p, p, p, p,
+      p, p, p, p, p, p, p
+    }));
+
+    auto inputTensor = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>(inputValues));
+
+    LayerTestResult<T, 2> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>(expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PadQueueDescriptor descriptor;
+
+    std::vector<std::pair<unsigned int, unsigned int>> padList;
+    padList.push_back(std::pair<unsigned int, unsigned int>(2,2));
+    padList.push_back(std::pair<unsigned int, unsigned int>(2,2));
+
+    descriptor.m_Parameters.m_PadList = padList;
+    descriptor.m_Parameters.m_PadValue = customPaddingValue;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> Pad3dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    const armnn::TensorShape inputShape{ 2, 2, 2 };
+    const armnn::TensorShape outputShape{ 3, 5, 6 };
+
+    const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
+    const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
+
+    std::vector<T> inputValues(
+      QuantizedVector<T>(qScale,qOffset,
+    {
+        // Channel 0, Height (2) x Width (2)
+        0, 4,
+        2, 5,
+
+        // Channel 1, Height (2) x Width (2)
+        6, 1,
+        5, 2
+    }));
+
+    std::vector<T> expectedOutputValues(
+      QuantizedVector<T>(qScale,qOffset,
+    {
+
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 4, 0, 0,
+        0, 0, 2, 5, 0, 0,
+        0, 0, 0, 0, 0, 0,
+
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
+        0, 0, 6, 1, 0, 0,
+        0, 0, 5, 2, 0, 0,
+        0, 0, 0, 0, 0, 0,
+
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0
+
+    }));
+
+    auto inputTensor = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>(inputValues));
+
+    LayerTestResult<T, 3> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PadQueueDescriptor descriptor;
+
+    std::vector<std::pair<unsigned int, unsigned int>> PadList;
+    PadList.push_back(std::pair<unsigned int, unsigned int>(0,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
+
+    descriptor.m_Parameters.m_PadList = PadList;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> Pad4dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset)
+{
+    const armnn::TensorShape inputShape{ 2, 2, 3, 2 };
+    const armnn::TensorShape outputShape{ 4, 5, 7, 4 };
+
+    const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
+    const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
+
+    std::vector<T> inputValues(
+      QuantizedVector<T>(qScale,qOffset,
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        0, 1,
+        2, 3,
+        4, 5,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        6, 7,
+        8, 9,
+        10, 11,
+
+        // Batch 1, Channel 0, Height (3) x Width (2)
+        12, 13,
+        14, 15,
+        16, 17,
+
+        // Batch 1, Channel 1, Height (3) x Width (2)
+        18, 19,
+        20, 21,
+        22, 23
+    }));
+
+    std::vector<T> expectedOutputValues(
+      QuantizedVector<T>(qScale,qOffset,
+    {
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 1, 0,
+        0, 2, 3, 0,
+        0, 4, 5, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 6, 7, 0,
+        0, 8, 9, 0,
+        0, 10, 11, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 12, 13, 0,
+        0, 14, 15, 0,
+        0, 16, 17, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 18, 19, 0,
+        0, 20, 21, 0,
+        0, 22, 23, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0
+    }));
+
+    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(inputValues));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PadQueueDescriptor descriptor;
+
+    std::vector<std::pair<unsigned int, unsigned int>> PadList;
+    PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(3,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
+
+    descriptor.m_Parameters.m_PadList = PadList;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 2>
+Pad2dTestCommon<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    const float customPaddingValue);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 3>
+Pad3dTestCommon<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+Pad4dTestCommon<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset);
+
+//
+// Implementation functions
+//
+
+LayerTestResult<uint8_t, 2> PadUint82dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<uint8_t, 2> PadUint82dCustomPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0, 1.0f);
+}
+
+LayerTestResult<uint8_t, 3> PadUint83dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad3dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<uint8_t, 4> PadUint84dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad4dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<float, 2> PadFloat322dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 2> PadFloat322dCustomPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0, 1.0f);
+}
+
+LayerTestResult<float, 3> PadFloat323dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad3dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<float, 4> PadFloat324dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return Pad4dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/PadTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/PadTestImpl.hpp
new file mode 100644 (file)
index 0000000..156b861
--- /dev/null
@@ -0,0 +1,69 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/Types.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Pad2dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset,
+    const float customPaddingValue = 0.0f);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Pad3dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Pad4dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float qScale,
+    int32_t qOffset);
+
+LayerTestResult<uint8_t, 2> PadUint82dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> PadUint82dCustomPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> PadUint83dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> PadUint84dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> PadFloat322dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> PadFloat322dCustomPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> PadFloat323dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> PadFloat324dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2,21 +2,21 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #pragma once
 
-#include "QuantizeHelper.hpp"
-#include "WorkloadTestUtils.hpp"
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
-
-#include <test/TensorHelpers.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/IBackendInternal.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
 
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
 template<typename T>
 LayerTestResult<T, 4> SimplePermuteTestImpl(
         armnn::IWorkloadFactory& workloadFactory,
@@ -2,30 +2,28 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
-#include "TensorUtils.hpp"
+#include "Pooling2dTestImpl.hpp"
 
-#include "QuantizeHelper.hpp"
-
-#include <armnn/ArmNN.hpp>
+#include <armnn/LayerSupport.hpp>
 
+#include <DataLayoutIndexed.hpp>
 #include <Permute.hpp>
+#include <ResolveType.hpp>
+#include <TensorUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
 #include <backendsCommon/WorkloadInfo.hpp>
 
-#include <test/TensorHelpers.hpp>
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
-#include <DataLayoutIndexed.hpp>
+#include <test/TensorHelpers.hpp>
 
 #include <boost/numeric/conversion/cast.hpp>
 
-#include <algorithm>
-#include <string>
+namespace
+{
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 4> SimplePooling2dTestImpl(
@@ -1312,3 +1310,475 @@ LayerTestResult<T, 4> IgnorePaddingL2Pooling2dSize3TestCommon(
     return SimplePooling2dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, descriptor, qScale, qOffset, input, outputExpected);
 }
+
+} // anonymous namespace
+
+LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, forceNoPadding);
+}
+
+LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, forceNoPadding, 3.0f, -5);
+}
+
+LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager, forceNoPadding);
+}
+
+LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, forceNoPadding);
+}
+
+LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, forceNoPadding, 0.1f, 128);
+}
+
+LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager, forceNoPadding);
+}
+
+LayerTestResult<float, 4> SimpleMaxPooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleMaxPooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<uint8_t, 4> SimpleMaxPooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<int16_t, 4> SimpleMaxPooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, dataLayout);
+}
+LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
+            workloadFactory, memoryManager, 1.0f, -5);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleMaxPooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(
+            workloadFactory, memoryManager, 1.0f, -5);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingMaxPooling2dSize3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SimpleAveragePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, dataLayout, 0.5, -1);
+}
+
+LayerTestResult<int16_t, 4> SimpleAveragePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding)
+{
+    return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, forceNoPadding);
+}
+
+LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, 0.5, -1);
+}
+
+LayerTestResult<int16_t, 4> LargeTensorsAveragePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager);
+}
+LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::Float32>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::QuantisedAsymm8>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingAveragePooling2dSize3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SimpleL2Pooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleL2Pooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<int16_t, 4> SimpleL2Pooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    return SimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, dataLayout);
+}
+
+LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride1Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride4Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> L2Pooling2dSize7Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize7TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize7TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize7Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize7TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> L2Pooling2dSize9Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize9TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize9TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize9Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return L2Pooling2dSize9TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleL2Pooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> IgnorePaddingL2Pooling2dSize3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> AsymmetricNonSquarePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> ComparePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::PoolingAlgorithm  poolingType)
+{
+    return ComparePooling2dTestCommon<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, refWorkloadFactory, poolingType);
+}
+
+LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::PoolingAlgorithm  poolingType)
+{
+    return ComparePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, refWorkloadFactory, poolingType, 0.1f, 128);
+}
+
+LayerTestResult<int16_t, 4> ComparePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::PoolingAlgorithm  poolingType)
+{
+    return ComparePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
+            workloadFactory, memoryManager, refWorkloadFactory, poolingType);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/Pooling2dTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/Pooling2dTestImpl.hpp
new file mode 100644 (file)
index 0000000..6f7a595
--- /dev/null
@@ -0,0 +1,279 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <armnn/Types.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float,   4> SimpleMaxPooling2dSize2x2Stride2x2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding);
+
+LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding);
+
+LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding);
+
+LayerTestResult<float,   4> SimpleMaxPooling2dSize3x3Stride2x4Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding);
+
+LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding );
+
+LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding );
+
+LayerTestResult<float,   4> SimpleMaxPooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<uint8_t, 4> SimpleMaxPooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<int16_t, 4> SimpleMaxPooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<float,   4> IgnorePaddingSimpleMaxPooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleMaxPooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> IgnorePaddingMaxPooling2dSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingMaxPooling2dSize3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> SimpleAveragePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<int16_t, 4> SimpleAveragePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<float,   4> LargeTensorsAveragePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> LargeTensorsAveragePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool forceNoPadding);
+
+LayerTestResult<float,   4> IgnorePaddingSimpleAveragePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4>   IgnorePaddingSimpleAveragePooling2dNoPaddingTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> IgnorePaddingAveragePooling2dSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingAveragePooling2dSize3Int16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> SimpleL2Pooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<int16_t, 4> SimpleL2Pooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<float,   4> L2Pooling2dSize3Stride1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride1Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> L2Pooling2dSize3Stride3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> L2Pooling2dSize3Stride4Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride4Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> L2Pooling2dSize7Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize7Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> L2Pooling2dSize9Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> L2Pooling2dSize9Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> IgnorePaddingSimpleL2Pooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingSimpleL2Pooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> IgnorePaddingL2Pooling2dSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> IgnorePaddingL2Pooling2dSize3Int16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float,   4> AsymmetricNonSquarePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> AsymmetricNonSquarePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> ComparePooling2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::PoolingAlgorithm  poolingType);
+
+LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::PoolingAlgorithm  poolingType);
+
+LayerTestResult<int16_t, 4> ComparePooling2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    armnn::PoolingAlgorithm  poolingType);
diff --git a/src/backends/backendsCommon/test/layerTests/PreluTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/PreluTestImpl.hpp
new file mode 100644 (file)
index 0000000..18a5bd0
--- /dev/null
@@ -0,0 +1,97 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> PreluTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 1, 2, 2, 3 }, ArmnnType);
+    armnn::TensorInfo alphaTensorInfo ({ 1, 1, 1, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 1, 2, 2, 3 }, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.25f);
+        inputTensorInfo.SetQuantizationOffset(128);
+        alphaTensorInfo.SetQuantizationScale(0.25f);
+        alphaTensorInfo.SetQuantizationOffset(50);
+        outputTensorInfo.SetQuantizationScale(0.5f);
+        outputTensorInfo.SetQuantizationOffset(120);
+    }
+
+    std::vector<float> inputData
+    {
+        // Expected quantized values:
+        // 128, 128, 128, 132, 132, 132, 124, 124, 124, 120, 120, 120
+        0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -2.0f, -2.0f, -2.0f
+    };
+    std::vector<float> alphaData
+    {
+        // Expected quantized values:
+        // 50, 54, 58
+        0.0f, 1.0f, 2.0f
+    };
+    std::vector<float> outputExpectedData =
+    {
+        // Expected quantized values:
+        // 20, 120, 120, 122, 122, 122, 120, 118, 116, 120, 116, 112
+       0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, -1.0f, -2.0f, 0.0f, -2.0f, -4.0f
+    };
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+    auto alpha = MakeTensor<T, 4>(alphaTensorInfo, QuantizedVector<T>(alphaTensorInfo.GetQuantizationScale(),
+                                                                      alphaTensorInfo.GetQuantizationOffset(),
+                                                                      alphaData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputExpectedData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> alphaHandle  = workloadFactory.CreateTensorHandle(alphaTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PreluQueueDescriptor descriptor;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload (descriptor, info, inputTensorInfo,  inputHandle.get());
+    AddInputToWorkload (descriptor, info, alphaTensorInfo,  alphaHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePrelu(descriptor, info);
+
+    inputHandle->Allocate();
+    alphaHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+    CopyDataToITensorHandle(alphaHandle.get(), &alpha[0][0][0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+}
@@ -2,20 +2,20 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
+#include "QuantizeTestImpl.hpp"
 
-#include <test/TensorHelpers.hpp>
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/IBackendInternal.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
 
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
 
 namespace
 {
@@ -124,3 +124,24 @@ LayerTestResult<T, 4> QuantizeClampTest(
 }
 
 } // anonymous namespace
+
+LayerTestResult<uint8_t, 4> QuantizeSimpleUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return QuantizeSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> QuantizeClampUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return QuantizeClampTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> QuantizeClampInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return QuantizeClampTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/QuantizeTestImpl.hpp
new file mode 100644 (file)
index 0000000..bac438e
--- /dev/null
@@ -0,0 +1,23 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<uint8_t, 4> QuantizeSimpleUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> QuantizeClampUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> QuantizeClampInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/ReshapeTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReshapeTestImpl.cpp
new file mode 100644 (file)
index 0000000..bce24f0
--- /dev/null
@@ -0,0 +1,201 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReshapeTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<typename T, size_t NumDims>
+LayerTestResult<T, NumDims> SimpleReshapeTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::TensorInfo inputTensorInfo,
+    armnn::TensorInfo outputTensorInfo,
+    const std::vector<T>& inputData,
+    const std::vector<T>& outputExpectedData)
+{
+    auto input = MakeTensor<T, NumDims>(inputTensorInfo, inputData);
+
+    LayerTestResult<T, NumDims> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<T, NumDims>(outputTensorInfo, outputExpectedData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ReshapeQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReshape(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
+
+    return ret;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> SimpleReshapeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[] = { 2, 2, 3, 3 };
+    unsigned int outputShape[] = { 2, 2, 9, 1 };
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(1.0f);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(1.0f);
+
+    auto input = ConvertToDataType<ArmnnType>(
+        {
+            0.0f, 1.0f, 2.0f,
+            3.0f, 4.0f, 5.0f,
+            6.0f, 7.0f, 8.0f,
+
+            9.0f, 10.0f, 11.0f,
+            12.0f, 13.0f, 14.0f,
+            15.0f, 16.0f, 17.0f,
+
+            18.0f, 19.0f, 20.0f,
+            21.0f, 22.0f, 23.0f,
+            24.0f, 25.0f, 26.0f,
+
+            27.0f, 28.0f, 29.0f,
+            30.0f, 31.0f, 32.0f,
+            33.0f, 34.0f, 35.0f,
+        },
+        inputTensorInfo);
+
+    auto outputExpected = ConvertToDataType<ArmnnType>(
+        {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+
+            18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f,
+
+            27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f,
+        },
+        outputTensorInfo);
+
+    return SimpleReshapeTestImpl<T, 4>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 5> Reshape5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[] = { 2, 2, 8, 1, 1 };
+    unsigned int outputShape[] = { 2, 2, 2, 2, 2 };
+
+    inputTensorInfo = armnn::TensorInfo(5, inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(1.0f);
+    outputTensorInfo = armnn::TensorInfo(5, outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(1.0f);
+
+    auto input = ConvertToDataType<ArmnnType>(
+        {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+            8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+
+            16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f,
+            24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
+        },
+        inputTensorInfo);
+
+    auto outputExpected = ConvertToDataType<ArmnnType>(
+        {
+            0.0f, 1.0f,
+            2.0f, 3.0f,
+
+            4.0f, 5.0f,
+            6.0f, 7.0f,
+
+
+            8.0f, 9.0f,
+            10.0f, 11.0f,
+
+            12.0f, 13.0f,
+            14.0f, 15.0f,
+
+
+
+            16.0f, 17.0f,
+            18.0f, 19.0f,
+
+            20.0f, 21.0f,
+            22.0f, 23.0f,
+
+
+            24.0f, 25.0f,
+            26.0f, 27.0f,
+
+            28.0f, 29.0f,
+            30.0f, 31.0f,
+        },
+        outputTensorInfo);
+
+    return SimpleReshapeTestImpl<T, 5>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected);
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+SimpleReshapeTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+SimpleReshapeTest<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+SimpleReshapeTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 5>
+Reshape5dTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 5>
+Reshape5dTest<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 5>
+Reshape5dTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/ReshapeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReshapeTestImpl.hpp
new file mode 100644 (file)
index 0000000..fb0bb33
--- /dev/null
@@ -0,0 +1,23 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleReshapeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> Reshape5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/ResizeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ResizeTestImpl.hpp
new file mode 100644 (file)
index 0000000..bb2392f
--- /dev/null
@@ -0,0 +1,1012 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <Permute.hpp>
+#include <ResolveType.hpp>
+#include <TensorUtils.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+//
+// ResizeBilinear
+//
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeBilinearNopTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.5f);
+        inputTensorInfo.SetQuantizationOffset(-3);
+        outputTensorInfo.SetQuantizationScale(1.5f);
+        outputTensorInfo.SetQuantizationOffset(-3);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1, 2, 3, 4,
+                2, 3, 4, 5,
+                3, 4, 5, 6,
+                4, 5, 6, 7
+            }
+        : std::initializer_list<float>
+            {
+                1.0f, 2.0f, 3.0f, 4.0f,
+                2.0f, 3.0f, 4.0f, 5.0f,
+                3.0f, 4.0f, 5.0f, 6.0f,
+                4.0f, 5.0f, 6.0f, 7.0f,
+
+                1.0f, 2.0f, 3.0f, 4.0f,
+                2.0f, 3.0f, 4.0f, 5.0f,
+                3.0f, 4.0f, 5.0f, 6.0f,
+                4.0f, 5.0f, 6.0f, 7.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = input;
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleResizeBilinearTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 1, 1, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 1, 1, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.1567f);
+        inputTensorInfo.SetQuantizationOffset(1);
+        outputTensorInfo.SetQuantizationScale(0.1567f);
+        outputTensorInfo.SetQuantizationOffset(1);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                  1, 255,
+                200, 250
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f, 255.0f,
+                200.0f, 250.0f,
+
+                250.0f, 200.0f,
+                250.0f,   1.0f
+            };
+
+    // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
+    // then figures out the interpolants and weights. Note this is different to projecting the centre of the
+    // output texel. Thus, for a input matrix of 2x2, we'll expect the output 1x1 matrix to contain, as
+    // its single element, the value that was at position (0,0) of the input matrix (rather than an average,
+    // which we would expect if projecting the centre).
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,
+
+                250.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeBilinearSqMinTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(3.141592f);
+        inputTensorInfo.SetQuantizationOffset(3);
+        outputTensorInfo.SetQuantizationScale(3.141592f);
+        outputTensorInfo.SetQuantizationOffset(3);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1, 2, 3, 4,
+                2, 3, 4, 5,
+                3, 4, 5, 6,
+                4, 5, 6, 7
+            }
+        : std::initializer_list<float>
+            {
+                1.0f, 2.0f, 3.0f, 4.0f,
+                2.0f, 3.0f, 4.0f, 5.0f,
+                3.0f, 4.0f, 5.0f, 6.0f,
+                4.0f, 5.0f, 6.0f, 7.0f,
+
+                7.0f, 6.0f, 5.0f, 4.0f,
+                6.0f, 5.0f, 4.0f, 3.0f,
+                5.0f, 4.0f, 3.0f, 2.0f,
+                4.0f, 3.0f, 2.0f, 1.0f
+            };
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1, 3,
+                3, 5
+            }
+        : std::initializer_list<float>
+            {
+                1.0f, 3.0f,
+                3.0f, 5.0f,
+
+                7.0f, 5.0f,
+                5.0f, 3.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeBilinearMinTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 2, 3, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 1, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 2, 3, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.5f);
+        inputTensorInfo.SetQuantizationOffset(-1);
+        outputTensorInfo.SetQuantizationScale(1.5f);
+        outputTensorInfo.SetQuantizationOffset(-1);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                3.0f,  4.5f,  6.0f, // 1,  2,  3, : Expected quantised values
+                9.0f, 13.5f, 21.0f  // 5,  8, 13
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,   2.0f,   3.0f,   5.0f,   8.0f,
+                 13.0f,  21.0f,  34.0f,  55.0f,  89.0f,
+                144.0f, 233.0f, 377.0f, 610.0f, 987.0f,
+
+                987.0f, 610.0f, 377.0f, 233.0f, 144.0f,
+                 89.0f,  55.0f,  34.0f,  21.0f,  13.0f,
+                   8.0f,  5.0f,   3.0f,   2.0f,   1.0f
+            };
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                3.0f, 5.25f // 1, 3
+            }
+        : std::initializer_list<float>
+            {
+                 1.0f,   2.6666f,   6.00f,
+                78.5f, 179.3333f, 401.00f,
+
+                987.0f, 454.6670f, 203.33f,
+                 48.5f,  22.3333f,  10.00f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeBilinearMagTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 3, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 3, 2, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 3, 5, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.010765f);
+        inputTensorInfo.SetQuantizationOffset(7);
+        outputTensorInfo.SetQuantizationScale(0.010132f);
+        outputTensorInfo.SetQuantizationOffset(-18);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                0.183005f, 2.379065f, // 24, 228, : Expected quantised values
+                1.054970f, 1.302565f, // 105, 128,
+                2.400595f, 0.688960f  // 230, 71
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,   2.0f,
+                 13.0f,  21.0f,
+                144.0f, 233.0f,
+
+                233.0f, 144.0f,
+                 21.0f,  13.0f,
+                  2.0f,   1.0f
+            };
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                0.18300501f, 1.06142902f, 1.93985295f, 2.37906504f, 2.37906504f,
+                1.05497003f, 1.15400803f, 1.25304604f, 1.30256498f, 1.30256498f,
+                2.40059495f, 1.71594095f, 1.03128707f, 0.68896002f, 0.68896002f
+                // 0, 87, 173, 217, 217, : Expected quantised values
+                // 86, 96, 106, 111, 111,
+                // 219, 151, 84, 50, 50
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,   1.4f,   1.8f,   2.0f,   2.0f,
+                 13.0f,  16.2f,  19.4f,  21.0f,  21.0f,
+                144.0f, 179.6f, 215.2f, 233.0f, 233.0f,
+
+                233.0f, 197.4f, 161.8f, 144.0f, 144.0f,
+                 21.0f,  17.8f,  14.6f,  13.0f,  13.0f,
+                  2.0f,   1.6f,   1.2f,   1.0f,   1.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::Bilinear;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+//
+// ResizeNearestNeighbor
+//
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeNearestNeighborNopTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.5f);
+        inputTensorInfo.SetQuantizationOffset(-3);
+        outputTensorInfo.SetQuantizationScale(1.5f);
+        outputTensorInfo.SetQuantizationOffset(-3);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1, 2, 3, 4,
+                2, 3, 4, 5,
+                3, 4, 5, 6,
+                4, 5, 6, 7
+            }
+        : std::initializer_list<float>
+            {
+                1.0f, 2.0f, 3.0f, 4.0f,
+                2.0f, 3.0f, 4.0f, 5.0f,
+                3.0f, 4.0f, 5.0f, 6.0f,
+                4.0f, 5.0f, 6.0f, 7.0f,
+
+                1.0f, 2.0f, 3.0f, 4.0f,
+                2.0f, 3.0f, 4.0f, 5.0f,
+                3.0f, 4.0f, 5.0f, 6.0f,
+                4.0f, 5.0f, 6.0f, 7.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = input;
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleResizeNearestNeighborTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 1, 1, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 1, 1, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.1567f);
+        inputTensorInfo.SetQuantizationOffset(1);
+        outputTensorInfo.SetQuantizationScale(0.1567f);
+        outputTensorInfo.SetQuantizationOffset(1);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                  1, 255,
+                200, 250
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f, 255.0f,
+                200.0f, 250.0f,
+
+                250.0f, 200.0f,
+                250.0f,   1.0f
+            };
+
+    // The 'resize' operation projects the top-left corner of output texels into the input image,
+    // then figures out the interpolants and weights. Note this is different to projecting the centre of the
+    // output texel. Thus, for a input matrix of 2x2, we'll expect the output 1x1 matrix to contain, as
+    // its single element, the value that was at position (0,0) of the input matrix (rather than an average,
+    // which we would expect if projecting the centre).
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,
+
+                250.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::NearestNeighbor;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeNearestNeighborSqMinTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 4, 4, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 4, 4, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 2, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 2, 2, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(3.141592f);
+        inputTensorInfo.SetQuantizationOffset(3);
+        outputTensorInfo.SetQuantizationScale(3.141592f);
+        outputTensorInfo.SetQuantizationOffset(3);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1, 2, 3, 4,
+                2, 3, 4, 5,
+                3, 4, 5, 6,
+                4, 5, 6, 7
+            }
+        : std::initializer_list<float>
+            {
+                1.0f, 2.0f, 3.0f, 4.0f,
+                2.0f, 3.0f, 4.0f, 5.0f,
+                3.0f, 4.0f, 5.0f, 6.0f,
+                4.0f, 5.0f, 6.0f, 7.0f,
+
+                7.0f, 6.0f, 5.0f, 4.0f,
+                6.0f, 5.0f, 4.0f, 3.0f,
+                5.0f, 4.0f, 3.0f, 2.0f,
+                4.0f, 3.0f, 2.0f, 1.0f
+            };
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                1, 3,
+                3, 5
+            }
+        : std::initializer_list<float>
+            {
+                1.0f, 3.0f,
+                3.0f, 5.0f,
+
+                7.0f, 5.0f,
+                5.0f, 3.0f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    descriptor.m_Parameters.m_Method     = armnn::ResizeMethod::NearestNeighbor;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeNearestNeighborMinTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 2, 3, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 1, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 2, 3, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.5f);
+        inputTensorInfo.SetQuantizationOffset(-1);
+        outputTensorInfo.SetQuantizationScale(1.5f);
+        outputTensorInfo.SetQuantizationOffset(-1);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                3.0f,  4.5f,  6.0f, // 1,  2,  3, : Expected quantised values
+                9.0f, 13.5f, 21.0f  // 5,  8, 13
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,   2.0f,   3.0f,   5.0f,   8.0f,
+                 13.0f,  21.0f,  34.0f,  55.0f,  89.0f,
+                144.0f, 233.0f, 377.0f, 610.0f, 987.0f,
+
+                987.0f, 610.0f, 377.0f, 233.0f, 144.0f,
+                 89.0f,  55.0f,  34.0f,  21.0f,  13.0f,
+                  8.0f,   5.0f,   3.0f,   2.0f,   1.0f
+            };
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                3.0f, 4.5f // 1, 3
+            }
+        : std::initializer_list<float>
+            {
+                  1.f,   2.f,   5.f,
+                 13.f,  21.f,  55.f,
+
+                987.f, 610.f, 233.f,
+                 89.f,  55.f,  21.f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> ResizeNearestNeighborMagTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::DataLayout dataLayout,
+        float inQuantScale,
+        int32_t inQuantOffset,
+        float outQuantScale,
+        int32_t outQuantOffset)
+{
+    armnn::TensorInfo inputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 3, 2, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 3, 2, dataLayout, ArmnnType);
+
+    armnn::TensorInfo outputTensorInfo = armnn::IsQuantizedType<T>()
+        ?  armnnUtils::GetTensorInfo(1, 1, 3, 5, dataLayout, ArmnnType)
+        :  armnnUtils::GetTensorInfo(1, 2, 3, 5, dataLayout, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(inQuantScale);
+        inputTensorInfo.SetQuantizationOffset(inQuantOffset);
+        outputTensorInfo.SetQuantizationScale(outQuantScale);
+        outputTensorInfo.SetQuantizationOffset(outQuantOffset);
+    }
+
+    std::vector<float> inputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                0.183005f, 2.379065f, //  24, 228, : expected quantised values
+                1.054970f, 1.302565f, // 105, 128,
+                2.400595f, 0.688960f  // 230, 71
+            }
+        : std::initializer_list<float>
+            {
+                  1.0f,   2.0f,
+                 13.0f,  21.0f,
+                144.0f, 233.0f,
+
+                233.0f, 144.0f,
+                 21.0f,  13.0f,
+                  2.0f,   1.0f
+            };
+
+    std::vector<float> outputData = armnn::IsQuantizedType<T>()
+        ? std::initializer_list<float>
+            {
+                0.183005f, 0.183005f, 0.183005f, 2.379065f, 2.379065f,
+                1.054970f, 1.054970f, 1.054970f, 1.302565f, 1.302565f,
+                2.400595f, 2.400595f, 2.400595f, 0.688960f, 0.688960f
+            }
+        : std::initializer_list<float>
+            {
+                  1.f,   1.f,   1.f,   2.f,   2.f,
+                 13.f,  13.f,  13.f,  21.f,  21.f,
+                144.f, 144.f, 144.f, 233.f, 233.f,
+
+                233.f, 233.f, 233.f, 144.f, 144.f,
+                 21.f,  21.f,  21.f,  13.f,  13.f,
+                  2.f,   2.f,   2.f,   1.f,   1.f
+            };
+
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (dataLayout == armnn::DataLayout::NHWC)
+    {
+        std::vector<float> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
+        inputData = tmp;
+
+        std::vector<float> tmp1(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
+        outputData = tmp1;
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
+                                                                      inputTensorInfo.GetQuantizationOffset(),
+                                                                      inputData));
+
+    LayerTestResult<T, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+                                             QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
+                                                                outputTensorInfo.GetQuantizationOffset(),
+                                                                outputData));
+
+    std::unique_ptr <armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr <armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ResizeQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_DataLayout = dataLayout;
+    descriptor.m_Parameters.m_Method = armnn::ResizeMethod::NearestNeighbor;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResize(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+    return result;
+}
diff --git a/src/backends/backendsCommon/test/layerTests/RsqrtTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/RsqrtTestImpl.cpp
new file mode 100644 (file)
index 0000000..c835ff2
--- /dev/null
@@ -0,0 +1,256 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReshapeTestImpl.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Rsqrt2dTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::TensorInfo inputTensorInfo,
+    const armnn::TensorInfo outputTensorInfo,
+    const std::vector<float>& inputValues,
+    const std::vector<float>& expectedOutputValues)
+{
+    auto inputTensor = MakeTensor<T, 2>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputValues,inputTensorInfo));
+
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
+                                             ConvertToDataType<ArmnnType>(expectedOutputValues,outputTensorInfo));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::RsqrtQueueDescriptor descriptor;
+
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateRsqrt(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
+
+    return result;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> Rsqrt2dTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const armnn::TensorShape inputShape{ 2, 2 };
+    const armnn::TensorShape outputShape{ 2, 2 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+    inputTensorInfo.SetQuantizationOffset(0);
+
+    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(0.1f);
+    outputTensorInfo.SetQuantizationOffset(0);
+
+    std::vector<float> inputValues
+    {
+        1.f, 4.f,
+        16.f, 25.f
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+        1.f, 0.5f,
+        0.25f, 0.2f
+    };
+
+    return Rsqrt2dTestCommon<ArmnnType>(workloadFactory, memoryManager,
+                                inputTensorInfo, outputTensorInfo,
+                                inputValues, expectedOutputValues);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> Rsqrt3dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const armnn::TensorShape inputShape{ 3, 1, 2 };
+    const armnn::TensorShape outputShape{ 3, 1, 2 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+    inputTensorInfo.SetQuantizationOffset(0);
+
+    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(0.1f);
+    outputTensorInfo.SetQuantizationOffset(0);
+
+    std::vector<float> inputValues
+    {
+        1.f, 4.f, 16.f,
+        25.f, 64.f, 100.f
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+        1.f, 0.5f, 0.25f,
+        0.2f, 0.125f, 0.1f
+    };
+
+    auto inputTensor = MakeTensor<T, 3>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputValues,inputTensorInfo));
+
+    LayerTestResult<T, 3> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, 3>(outputTensorInfo,
+                                             ConvertToDataType<ArmnnType>(expectedOutputValues,outputTensorInfo));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::RsqrtQueueDescriptor descriptor;
+
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateRsqrt(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
+
+    return result;
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> RsqrtZeroTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const armnn::TensorShape inputShape{ 1, 2 };
+    const armnn::TensorShape outputShape{ 1, 2 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+
+    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(0.1f);
+
+    std::vector<float> inputValues
+    {
+        0.f, -0.f
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+        INFINITY, -INFINITY
+    };
+
+    return Rsqrt2dTestCommon<ArmnnType>(workloadFactory, memoryManager,
+                                inputTensorInfo, outputTensorInfo,
+                                inputValues, expectedOutputValues);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> RsqrtNegativeTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const armnn::TensorShape inputShape{ 1, 2 };
+    const armnn::TensorShape outputShape{ 1, 2 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+    inputTensorInfo.SetQuantizationOffset(0);
+
+    armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(0.1f);
+    outputTensorInfo.SetQuantizationOffset(0);
+
+    std::vector<float> inputValues
+    {
+        -25.f, -16.f
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+        -NAN, -NAN
+    };
+
+    return Rsqrt2dTestCommon<ArmnnType>(workloadFactory, memoryManager,
+                                inputTensorInfo, outputTensorInfo,
+                                inputValues, expectedOutputValues);
+}
+
+//
+// Explicit template specializations
+//
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2>
+Rsqrt2dTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 2>
+Rsqrt2dTest<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 2>
+Rsqrt2dTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3>
+Rsqrt3dTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 3>
+Rsqrt3dTest<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 3>
+Rsqrt3dTest<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2>
+RsqrtZeroTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2>
+RsqrtNegativeTest<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/RsqrtTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/RsqrtTestImpl.hpp
new file mode 100644 (file)
index 0000000..e5a5340
--- /dev/null
@@ -0,0 +1,33 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> Rsqrt2dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Rsqrt3dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> RsqrtZeroTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> RsqrtNegativeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/SoftmaxTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/SoftmaxTestImpl.cpp
new file mode 100644 (file)
index 0000000..49184ed
--- /dev/null
@@ -0,0 +1,682 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SoftmaxTestImpl.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+#include <algorithm>
+
+namespace
+{
+
+struct Simple3dSoftmaxOutputData
+{
+    const std::vector<float> outputData =
+    {
+        0.0964599f, 0.26220518f, 0.0964599f, 0.0964599f,
+        0.15903549f, 0.0964599f, 0.0964599f, 0.0964599f
+    };
+
+    const armnn::TensorShape inputShape{ 1, 8, 1 };
+
+    const std::vector<float> inputData =
+    {
+        0.0f, 1.0f, 0.0f, 0.0f,
+        0.5f, 0.0f, 0.0f, 0.0f,
+    };
+};
+
+struct Simple4dSoftmaxData
+{
+    const armnn::TensorShape inputShape{ 1, 8, 1, 1 };
+
+    const std::vector<float> outputData =
+    {
+        0.0964599f, 0.26220518f, 0.0964599f, 0.0964599f,
+        0.15903549f, 0.0964599f, 0.0964599f, 0.0964599f
+    };
+
+    const std::vector<float> inputData =
+    {
+         0.0f, 1.0f, 0.0f, 0.0f,
+         0.5f, 0.0f, 0.0f, 0.0f
+    };
+};
+
+template<armnn::DataType ArmnnType, std::size_t n, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, n> SimpleSoftmaxBaseTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta,
+    const armnn::TensorShape& inputShape,
+    const std::vector<float>& outputData,
+    const std::vector<float>& inputData,
+    int axis = 1)
+{
+    using std::exp;
+
+    const float qScale = 1.f / 256.f;
+    const int qOffset = 0;
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    inputTensorInfo = armnn::TensorInfo(inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(qScale);
+    inputTensorInfo.SetQuantizationOffset(qOffset);
+
+    outputTensorInfo = armnn::TensorInfo(inputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(qScale);
+    outputTensorInfo.SetQuantizationOffset(qOffset);
+
+    LayerTestResult<T, n> ret(outputTensorInfo);
+
+    // Each row is independently softmax'd.
+    auto input = MakeTensor<T, n>(inputTensorInfo, std::vector<T>(
+        QuantizedVector<T>(qScale, qOffset, inputData)));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::SoftmaxQueueDescriptor data;
+    data.m_Parameters.m_Beta = beta;
+    data.m_Parameters.m_Axis = axis;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    BOOST_ASSERT(workload);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
+
+    std::vector<T> expectedOutput = std::vector<T>(
+            QuantizedVector<T>(qScale, qOffset, outputData));
+    ret.outputExpected = MakeTensor<T, n>(outputTensorInfo, expectedOutput);
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> SimpleSoftmaxTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta)
+{
+    using std::exp;
+    const armnn::TensorShape inputShape{ 2, 4 };
+
+    float x0[4] = { exp((0.f - 1.0f) * beta), exp((1.0f - 1.0f) * beta),
+                    exp((0.0f - 1.0f) * beta), exp((0.0f - 1.0f) * beta) };
+    float sum0 = x0[0] + x0[1] + x0[2] + x0[3];
+    float x1[4] = { exp((0.5f - 0.5f) * beta), exp((0.0f - 0.5f) * beta),
+                    exp((0.0f - 0.5f) * beta), exp((0.0f - 0.5f) * beta) };
+    float sum1 = x1[0] + x1[1] + x1[2] + x1[3];
+
+    const std::vector<float> outputData = { x0[0] / sum0, x0[1] / sum0, x0[2] / sum0, x0[3] / sum0,
+                                            x1[0] / sum1, x1[1] / sum1, x1[2] / sum1, x1[3] / sum1 };
+
+    const std::vector<float> inputData =
+            {
+                0.f, 1.f, 0.f, 0.f,
+                .5f, 0.f, 0.f, 0.f,
+            };
+
+    return SimpleSoftmaxBaseTestImpl<ArmnnType, 2>(workloadFactory, memoryManager, beta,
+                                                   inputShape, outputData, inputData);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> SimpleSoftmaxTestImpl(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis)
+{
+    armnn::TensorShape inputShape;
+    std::vector<float> inputData;
+    std::vector<float> outputData;
+    switch (axis)
+    {
+    case -2:
+    case 0:
+        {
+        inputShape = {5, 2};
+
+        inputData =
+                {
+                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
+                };
+
+        outputData =
+                {
+                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                        0.087144312427294f,
+                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                        7.246299848982885e-08f
+                };
+        break;
+        }
+    case -1:
+    case 1:
+        {
+        inputShape = {2, 5};
+
+        inputData =
+                {
+                        17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
+                };
+
+        outputData =
+                {
+                        0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                        7.246299848982885e-08f,
+                        0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                        7.246299848982885e-08f
+                };
+        break;
+        }
+    }
+    return SimpleSoftmaxBaseTestImpl<ArmnnType, 2>(workloadFactory, memoryManager, beta,
+                                                   inputShape, outputData, inputData, axis);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Simple3dSoftmaxTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta,
+    const armnn::TensorShape& inputShape,
+    const std::vector<float>& outputData,
+    const std::vector<float>& inputData,
+    int axis = 1)
+{
+    return SimpleSoftmaxBaseTestImpl<ArmnnType, 3>(workloadFactory, memoryManager, beta,
+                                                   inputShape, outputData, inputData, axis);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Simple4dSoftmaxTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta,
+    const armnn::TensorShape& inputShape,
+    const std::vector<float>& outputData,
+    const std::vector<float>& inputData,
+    int axis = 1)
+{
+
+    return SimpleSoftmaxBaseTestImpl<ArmnnType, 4>(workloadFactory, memoryManager, beta,
+                                                   inputShape, outputData, inputData, axis);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> CompareSoftmaxTestImpl(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        armnn::IWorkloadFactory& refWorkloadFactory,
+        float beta)
+{
+
+    const int batchSize = 20;
+    const int channels = 30;
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[] = { batchSize, channels };
+
+    inputTensorInfo = armnn::TensorInfo(2, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(2, inputShape, ArmnnType);
+    float qScale = 1.f / 256.f;
+    int qOffset = 0;
+    inputTensorInfo.SetQuantizationScale(qScale);
+    inputTensorInfo.SetQuantizationOffset(qOffset);
+    outputTensorInfo.SetQuantizationScale(qScale);
+    outputTensorInfo.SetQuantizationOffset(qOffset);
+
+
+    LayerTestResult<T, 2> ret(outputTensorInfo);
+    auto input = MakeRandomTensor<T, 2>(inputTensorInfo, 0xF00D, 0.0f, 1.0f);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::SoftmaxQueueDescriptor data;
+    data.m_Parameters.m_Beta = beta;
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
+
+
+    armnn::SoftmaxQueueDescriptor refData = data;
+    armnn::WorkloadInfo refInfo = info;
+    SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
+    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info);
+    std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateSoftmax(refData, refInfo);
+
+    outputHandleRef->Allocate();
+    inputHandleRef->Allocate();
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
+    CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]);
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    workloadRef->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
+    CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get());
+
+    return ret;
+}
+
+} // anonymous namespace
+
+LayerTestResult<float,2> SimpleSoftmaxTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta)
+{
+    return SimpleSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta);
+}
+
+LayerTestResult<float,2> SimpleAxisSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis)
+{
+    return SimpleSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, axis);
+}
+
+LayerTestResult<float,3> Simple3dSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    Simple3dSoftmaxOutputData data;
+    return Simple3dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta,
+                                                             data.inputShape, data.outputData, data.inputData);
+}
+
+LayerTestResult<float,3> Simple3dAxisSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis)
+{
+    armnn::TensorShape inputShape;
+    std::vector<float> inputData;
+    std::vector<float> outputData;
+    switch (axis)
+    {
+    case -3:
+    case 0:
+        {
+            inputShape = {5, 2, 2};
+
+            inputData =
+                    {
+                            17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
+
+                            15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f
+                    };
+
+            outputData =
+                    {
+                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
+                            0.236882800924671f,
+                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
+                            0.087144312427294f,
+
+                            0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
+                            0.032058600957022f,
+                            0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f
+                    };
+            break;
+        }
+    case -2:
+    case 1:
+        {
+            inputShape = {2, 5, 2};
+
+            inputData =
+                    {
+                            17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
+
+                            17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
+                    };
+
+            outputData =
+                    {
+                            0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                            0.087144312427294f,
+                            0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f,
+
+                            0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                            0.087144312427294f,
+                            0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f
+                    };
+        break;
+        }
+    case -1:
+    case 2:
+        {
+            inputShape = {2, 2, 5};
+
+            inputData =
+                    {
+                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
+                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
+                    };
+
+            outputData =
+                    {
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f
+                    };
+            break;
+        }
+    }
+
+    return Simple3dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta,
+                                                             inputShape, outputData, inputData, axis);
+}
+
+LayerTestResult<float,4> Simple4dSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    Simple4dSoftmaxData data;
+    return Simple4dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, data.inputShape,
+                                                             data.outputData, data.inputData);
+}
+
+LayerTestResult<float,4> Simple4dAxisSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis)
+{
+    armnn::TensorShape inputShape;
+    std::vector<float> inputData;
+    std::vector<float> outputData;
+    switch (axis)
+    {
+    case -4:
+    case 0:
+        {
+            inputShape = {5, 2, 2, 2};
+
+            inputData =
+                    {
+                            17.0f, -1.0f, 17.0f, -1.0f, 17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f,
+                            16.0f, -2.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f, 15.0f, -3.0f,
+                            15.0f, -3.0f, 15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 14.0f, -4.0f,
+                            14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f, 1.0f, -17.0f, 1.0f, -17.0f
+                    };
+
+            outputData =
+                    {
+                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
+                            0.643914213228014f,
+                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.236882800924671f,
+                            0.236882800924671f,
+                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.236882800924671f,
+                            0.236882800924671f,
+                            0.236882800924671f, 0.087144312427294f, 0.087144312427294f, 0.087144312427294f,
+                            0.087144312427294f,
+
+                            0.087144312427294f, 0.087144312427294f, 0.087144312427294f, 0.087144312427294f,
+                            0.032058600957022f,
+                            0.032058600957022f, 0.032058600957022f, 0.032058600957022f, 0.032058600957022f,
+                            0.032058600957022f,
+                            0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f,
+                            7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f, 7.246299848982885e-08f
+                    };
+            break;
+        }
+    case -3:
+    case 1:
+        {
+            inputShape = {2, 5, 2, 2};
+
+            inputData =
+                    {
+                            17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
+                            15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f,
+                            17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
+                            15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f
+                    };
+
+            outputData =
+                    {
+                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
+                            0.236882800924671f,
+                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
+                            0.087144312427294f,
+                            0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
+                            0.032058600957022f,
+                            0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f,
+
+
+                            0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
+                            0.236882800924671f,
+                            0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
+                            0.087144312427294f,
+                            0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
+                            0.032058600957022f,
+                            0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
+                            7.246299848982885e-08f
+                    };
+            break;
+        }
+    case -2:
+    case 2:
+        {
+        inputShape = {2, 2, 5, 2};
+
+        inputData =
+                {
+                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
+                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
+                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
+                        17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
+                };
+
+        outputData =
+                {
+                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                        0.087144312427294f,
+                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                        7.246299848982885e-08f,
+                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                        0.087144312427294f,
+                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                        7.246299848982885e-08f,
+
+                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                        0.087144312427294f,
+                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                        7.246299848982885e-08f,
+                        0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
+                        0.087144312427294f,
+                        0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
+                        7.246299848982885e-08f
+                };
+        break;
+        }
+    case -1:
+    case 3:
+        {
+            inputShape = {2, 2, 2, 5};
+
+            inputData =
+                    {
+                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
+                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
+                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
+                            17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
+                    };
+
+            outputData =
+                    {
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f,
+                            0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
+                            7.246299848982885e-08f
+                    };
+            break;
+        }
+    }
+
+    return Simple4dSoftmaxTestImpl<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        beta,
+        inputShape,
+        outputData,
+        inputData,
+        axis);
+}
+
+LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta)
+{
+    return SimpleSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta);
+}
+
+LayerTestResult<uint8_t,3> Simple3dSoftmaxUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    Simple3dSoftmaxOutputData data;
+    return Simple3dSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        beta,
+        data.inputShape,
+        data.outputData,
+        data.inputData);
+}
+
+LayerTestResult<uint8_t,4> Simple4dSoftmaxUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    Simple4dSoftmaxData data;
+
+    return Simple4dSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta,
+                                                                     data.inputShape, data.outputData, data.inputData);
+}
+
+LayerTestResult<int16_t,2> SimpleSoftmaxUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    return SimpleSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta);
+}
+
+LayerTestResult<int16_t,3> Simple3dSoftmaxUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    Simple3dSoftmaxOutputData data;
+    return Simple3dSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta,
+                                                                     data.inputShape, data.outputData, data.inputData);
+}
+
+LayerTestResult<int16_t,4> Simple4dSoftmaxUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta)
+{
+    Simple4dSoftmaxData data;
+
+    return Simple4dSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta,
+                                                                     data.inputShape, data.outputData, data.inputData);
+}
+
+LayerTestResult<float,2> CompareSoftmaxTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    float beta)
+{
+    return CompareSoftmaxTestImpl<armnn::DataType::Float32>(
+        workloadFactory, memoryManager, refWorkloadFactory, beta);
+}
+
+LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    float beta)
+{
+    return CompareSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory, memoryManager, refWorkloadFactory, beta);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/SoftmaxTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/SoftmaxTestImpl.hpp
new file mode 100644 (file)
index 0000000..96f5fb9
--- /dev/null
@@ -0,0 +1,86 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 2> SimpleSoftmaxTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta);
+
+LayerTestResult<float, 2> SimpleAxisSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis);
+
+LayerTestResult<float, 3> Simple3dSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<float, 3> Simple3dAxisSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis);
+
+LayerTestResult<float, 4> Simple4dSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<float, 4> Simple4dAxisSoftmaxTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta,
+        int axis);
+
+LayerTestResult<uint8_t, 2> SimpleSoftmaxUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    float beta);
+
+LayerTestResult<uint8_t,3> Simple3dSoftmaxUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<uint8_t,4> Simple4dSoftmaxUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<int16_t,2> SimpleSoftmaxUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<int16_t,3> Simple3dSoftmaxUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<int16_t,4> Simple4dSoftmaxUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        float beta);
+
+LayerTestResult<float, 2> CompareSoftmaxTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    float beta);
+
+LayerTestResult<uint8_t, 2> CompareSoftmaxUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::IWorkloadFactory& refWorkloadFactory,
+    float beta);
@@ -2,20 +2,22 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
+#include "SpaceToBatchNdTestImpl.hpp"
+
+#include <Permute.hpp>
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
+namespace
+{
+
 template<typename T>
 LayerTestResult<T, 4> SpaceToBatchNdTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
@@ -237,7 +239,7 @@ LayerTestResult<T, 4> SpaceToBatchNdPaddingTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SpaceToBatchNdSimpleNHWCTest(
+LayerTestResult<T, 4> SpaceToBatchNdSimpleNhwcTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -245,7 +247,7 @@ LayerTestResult<T, 4> SpaceToBatchNdSimpleNHWCTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SpaceToBatchNdMultiChannelsNHWCTest(
+LayerTestResult<T, 4> SpaceToBatchNdMultiChannelsNhwcTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -253,7 +255,7 @@ LayerTestResult<T, 4> SpaceToBatchNdMultiChannelsNHWCTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SpaceToBatchNdMultiBlockNHWCTest(
+LayerTestResult<T, 4> SpaceToBatchNdMultiBlockNhwcTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -261,9 +263,179 @@ LayerTestResult<T, 4> SpaceToBatchNdMultiBlockNHWCTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> SpaceToBatchNdPaddingNHWCTest(
+LayerTestResult<T, 4> SpaceToBatchNdPaddingNhwcTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
     return SpaceToBatchNdPaddingTest<ArmnnType>(workloadFactory, memoryManager, armnn::DataLayout::NHWC);
 }
+
+} // anonymous namespace
+
+LayerTestResult<float, 4> SpaceToBatchNdSimpleFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdSimpleTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiChannelsTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiBlockFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiBlockTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdPaddingFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdPaddingTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiChannelsTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiBlockTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdPaddingTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdSimpleNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdSimpleNhwcTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiChannelsNhwcTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiBlockNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiBlockNhwcTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToBatchNdPaddingNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdPaddingNhwcTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdSimpleNhwcTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiChannelsNhwcTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiBlockNhwcTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdPaddingNhwcTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdSimpleTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiChannelsTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiBlockTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdPaddingTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdSimpleNhwcTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiChannelsNhwcTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdMultiBlockNhwcTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToBatchNdPaddingNhwcTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/SpaceToBatchNdTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/SpaceToBatchNdTestImpl.hpp
new file mode 100644 (file)
index 0000000..0af99c5
--- /dev/null
@@ -0,0 +1,106 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> SpaceToBatchNdSimpleFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiBlockFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdPaddingFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdSimpleNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdMultiBlockNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToBatchNdPaddingNhwcFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingNhwcUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingNhwcUint16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2,20 +2,22 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
+#include "SpaceToDepthTestImpl.hpp"
+
+#include <Permute.hpp>
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
+namespace
+{
+
 template<typename T>
 LayerTestResult<T, 4> SpaceToDepthTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
@@ -119,7 +121,7 @@ LayerTestResult<T, 4> SpaceToDepthSimpleTest2(
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     armnn::DataLayout dataLayout = armnn::DataLayout::NHWC)
 {
-    unsigned int inputShape[] = {1, 2, 2, 2};
+    unsigned int inputShape[]  = {1, 2, 2, 2};
     unsigned int outputShape[] = {1, 1, 1, 8};
 
     std::vector<float> input = std::vector<float>(
@@ -145,3 +147,81 @@ LayerTestResult<T, 4> SpaceToDepthSimpleTest2(
     return SpaceToDepthTestImpl<T>(
         workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
 }
+
+} // anonymous namespace
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNhwcAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest1<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNchwAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest1<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<float, 4> SpaceToDepthNhwcFloat32Test1(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest1<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToDepthNchwFloat32Test1(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest1<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<float, 4> SpaceToDepthNhwcFloat32Test2(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest2<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToDepthNchwFloat32Test2(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest2<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<int16_t, 4> SpaceToDepthNhwcQSymm16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest2<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager);
+}
+
+LayerTestResult<int16_t, 4> SpaceToDepthNchwQSymm16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest2<armnn::DataType::QuantisedSymm16>(
+        workloadFactory,
+        memoryManager,
+        armnn::DataLayout::NCHW);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/SpaceToDepthTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/SpaceToDepthTestImpl.hpp
new file mode 100644 (file)
index 0000000..ef86829
--- /dev/null
@@ -0,0 +1,42 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNchwAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNhwcAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToDepthNhwcFloat32Test1(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToDepthNchwFloat32Test1(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToDepthNhwcFloat32Test2(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToDepthNchwFloat32Test2(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToDepthNhwcQSymm16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> SpaceToDepthNchwQSymm16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2,20 +2,22 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "WorkloadTestUtils.hpp"
+#include "SplitterTestImpl.hpp"
+
+#include <ResolveType.hpp>
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
 #include <backendsCommon/test/QuantizeHelper.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
+namespace
+{
+
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 std::vector<LayerTestResult<T,3>> SplitterTestCommon(
     armnn::IWorkloadFactory& workloadFactory,
@@ -244,7 +246,6 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(
     return ret;
 }
 
-
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 3> CopyViaSplitterTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
@@ -310,3 +311,47 @@ LayerTestResult<T, 3> CopyViaSplitterTestImpl(
 
     return ret;
 }
+
+} // anonymous namespace
+
+std::vector<LayerTestResult<float,3>> SplitterFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SplitterTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SplitterTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+std::vector<LayerTestResult<int16_t,3>> SplitterInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SplitterTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<float, 3> CopyViaSplitterFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return CopyViaSplitterTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
+}
+
+LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return CopyViaSplitterTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
+}
+
+LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return CopyViaSplitterTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/SplitterTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/SplitterTestImpl.hpp
new file mode 100644 (file)
index 0000000..34c5fba
--- /dev/null
@@ -0,0 +1,35 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <vector>
+
+std::vector<LayerTestResult<float, 3>> SplitterFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> CopyViaSplitterFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+std::vector<LayerTestResult<uint8_t, 3>> SplitterUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+std::vector<LayerTestResult<int16_t, 3>> SplitterInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/layerTests/StackTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/StackTestImpl.hpp
new file mode 100644 (file)
index 0000000..f063fbb
--- /dev/null
@@ -0,0 +1,495 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T, std::size_t outputDimLength>
+LayerTestResult<T, outputDimLength> StackTestHelper(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::TensorInfo& inputTensorInfo,
+        const armnn::TensorInfo& outputTensorInfo,
+        unsigned int axis,
+        const std::vector<std::vector<T>>& inputData,
+        const std::vector<T>& outputExpectedData)
+{
+    unsigned int numInputs = static_cast<unsigned int>(inputData.size());
+    std::vector<boost::multi_array<T, outputDimLength-1>> inputs;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        inputs.push_back(MakeTensor<T, outputDimLength-1>(inputTensorInfo, inputData[i]));
+    }
+
+    LayerTestResult<T, outputDimLength> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, outputDimLength>(outputTensorInfo, outputExpectedData);
+
+    std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        inputHandles.push_back(workloadFactory.CreateTensorHandle(inputTensorInfo));
+    }
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::StackQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Axis = axis;
+    descriptor.m_Parameters.m_InputShape = inputTensorInfo.GetShape();
+    descriptor.m_Parameters.m_NumInputs = numInputs;
+
+    armnn::WorkloadInfo info;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        std::unique_ptr<armnn::ITensorHandle>& inputHandle = inputHandles[i];
+        AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+        inputHandle->Allocate();
+        CopyDataToITensorHandle(inputHandle.get(), inputs[i].origin());
+    }
+
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+    outputHandle->Allocate();
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateStack(descriptor, info);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+    return result;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack0AxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 2, 3, 2, 3 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputData.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18,
+
+
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    };
+
+    return StackTestHelper<ArmnnType, T, 4>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        0U,
+        inputData,
+        outputExpectedData
+    );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack4dOutput1AxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 3, 2, 2, 3 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputData.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        19, 20, 21,
+        22, 23, 24,
+
+
+        7, 8, 9,
+        10, 11, 12,
+
+        25, 26, 27,
+        28, 29, 30,
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        31, 32, 33,
+        34, 35, 36
+    };
+
+    return StackTestHelper<ArmnnType, T, 4>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        1U,
+        inputData,
+        outputExpectedData
+    );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack4dOutput2AxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 3, 2, 2, 3 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputData.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 2, 3,
+        19, 20, 21,
+
+        4, 5, 6,
+        22, 23, 24,
+
+        7, 8, 9,
+        25, 26, 27,
+
+        10, 11, 12,
+        28, 29, 30,
+
+        13, 14, 15,
+        31, 32, 33,
+
+        16, 17, 18,
+        34, 35, 36
+    };
+
+    return StackTestHelper<ArmnnType, T, 4>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        2U,
+        inputData,
+        outputExpectedData
+    );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> Stack4dOutput3AxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 3, 2, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 3, 2, 3, 2 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputData.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 19,
+        2, 20,
+        3, 21,
+
+        4, 22,
+        5, 23,
+        6, 24,
+
+
+        7, 25,
+        8, 26,
+        9, 27,
+
+        10, 28,
+        11, 29,
+        12, 30,
+
+
+        13, 31,
+        14, 32,
+        15, 33,
+
+        16, 34,
+        17, 35,
+        18, 36
+    };
+
+    return StackTestHelper<ArmnnType, T, 4>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        3U,
+        inputData,
+        outputExpectedData
+    );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> Stack3dOutput1Axis3InputTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 3, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 3, 3, 3 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    });
+
+    inputData.push_back(
+    {
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputData.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 2, 3,
+        10, 11, 12,
+        19, 20, 21,
+
+        4, 5, 6,
+        13, 14, 15,
+        22, 23, 24,
+
+        7, 8, 9,
+        16, 17, 18,
+        25, 26, 27
+    };
+
+    return StackTestHelper<ArmnnType, T, 3>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        1U,
+        inputData,
+        outputExpectedData
+    );
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> Stack5dOutputTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 2, 2, 2, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 2, 2, 2, 2, 3 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24
+    });
+
+    inputData.push_back(
+    {
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36,
+
+
+        37, 38, 39,
+        40, 41, 42,
+
+        43, 44, 45,
+        46, 47, 48
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36,
+
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+
+
+        37, 38, 39,
+        40, 41, 42,
+
+        43, 44, 45,
+        46, 47, 48
+
+    };
+
+    return StackTestHelper<ArmnnType, T, 5>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        1U,
+        inputData,
+        outputExpectedData
+    );
+}
@@ -2,18 +2,15 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
+
+#include "StridedSliceTestImpl.hpp"
 
 #include <ResolveType.hpp>
-#include "WorkloadTestUtils.hpp"
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/TypesUtils.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/IBackendInternal.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
 
@@ -73,7 +70,7 @@ LayerTestResult<T, OutDim> StridedSliceTestImpl(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> StridedSlice4DTest(
+LayerTestResult<T, 4> StridedSlice4dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -110,7 +107,7 @@ LayerTestResult<T, 4> StridedSlice4DTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 4> StridedSlice4DReverseTest(
+LayerTestResult<T, 4> StridedSlice4dReverseTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -268,7 +265,7 @@ LayerTestResult<T, 2> StridedSliceShrinkAxisMaskTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> StridedSlice3DTest(
+LayerTestResult<T, 3> StridedSlice3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -308,7 +305,7 @@ LayerTestResult<T, 3> StridedSlice3DTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 3> StridedSlice3DReverseTest(
+LayerTestResult<T, 3> StridedSlice3dReverseTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -347,7 +344,7 @@ LayerTestResult<T, 3> StridedSlice3DReverseTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> StridedSlice2DTest(
+LayerTestResult<T, 2> StridedSlice2dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -387,7 +384,7 @@ LayerTestResult<T, 2> StridedSlice2DTest(
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 2> StridedSlice2DReverseTest(
+LayerTestResult<T, 2> StridedSlice2dReverseTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
 {
@@ -428,3 +425,192 @@ LayerTestResult<T, 2> StridedSlice2DReverseTest(
 }
 
 } // anonymous namespace
+
+LayerTestResult<float, 4> StridedSlice4dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> StridedSlice4dReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4dReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleStrideTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleRangeMaskTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceShrinkAxisMaskTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 3> StridedSlice3dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 3> StridedSlice3dReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3dReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> StridedSlice2dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> StridedSlice2dReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2dReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSlice4dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSlice4dReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4dReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleStrideTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleRangeMaskTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceShrinkAxisMaskTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 3> StridedSlice3dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 3> StridedSlice3dReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3dReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> StridedSlice2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2dTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> StridedSlice2dReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2dReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> StridedSlice4dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> StridedSlice4dReverseInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4dReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> StridedSliceSimpleStrideInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleStrideTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 4> StridedSliceSimpleRangeMaskInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleRangeMaskTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 2> StridedSliceShrinkAxisMaskInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceShrinkAxisMaskTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 3> StridedSlice3dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 3> StridedSlice3dReverseInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3dReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 2> StridedSlice2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2dTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<int16_t, 2> StridedSlice2dReverseInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2dReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/StridedSliceTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/StridedSliceTestImpl.hpp
new file mode 100644 (file)
index 0000000..1c83e3e
--- /dev/null
@@ -0,0 +1,119 @@
+//
+// Copyright Â© 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> StridedSlice4dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSlice4dReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> StridedSlice3dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> StridedSlice3dReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> StridedSlice2dFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> StridedSlice2dReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSlice4dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSlice4dReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> StridedSlice3dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> StridedSlice3dReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> StridedSlice2dUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> StridedSlice2dReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> StridedSlice4dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> StridedSlice4dReverseInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> StridedSliceSimpleStrideInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 4> StridedSliceSimpleRangeMaskInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> StridedSliceShrinkAxisMaskInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 3> StridedSlice3dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 3> StridedSlice3dReverseInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> StridedSlice2dInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<int16_t, 2> StridedSlice2dReverseInt16Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2,16 +2,18 @@
 // Copyright Â© 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "QuantizeHelper.hpp"
+#pragma once
 
 #include <armnn/ArmNN.hpp>
 
+#include <Permute.hpp>
 #include <ResolveType.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
+
 #include <backendsCommon/test/CommonTestUtils.hpp>
+#include <backendsCommon/test/QuantizeHelper.hpp>
 #include <backendsCommon/test/TensorCopyUtils.hpp>
 #include <backendsCommon/test/WorkloadTestUtils.hpp>
 
index c33190f..8b71a46 100644 (file)
@@ -6,7 +6,7 @@
 #include <backendsCommon/test/EndToEndTestImpl.hpp>
 
 #include <backendsCommon/test/ArithmeticTestImpl.hpp>
-#include <backendsCommon/test/ConcatTestImpl.hpp>
+#include <backendsCommon/test/ConcatEndToEndTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
 #include <backendsCommon/test/PreluEndToEndTestImpl.hpp>
 #include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
@@ -265,4 +265,4 @@ BOOST_AUTO_TEST_CASE(ClQuantizedLstmEndToEndTest)
     QuantizedLstmEndToEnd(defaultBackends);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()
index 0dfcc22..3f7b282 100644 (file)
@@ -9,14 +9,12 @@
 #include "test/TensorHelpers.hpp"
 #include "test/UnitTests.hpp"
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <cl/ClLayerSupport.hpp>
 #include <cl/ClWorkloadFactory.hpp>
 #include <cl/workloads/ClWorkloadUtils.hpp>
+
 #include <backendsCommon/test/ActivationFixture.hpp>
 #include <backendsCommon/test/LayerTests.hpp>
-#include <backendsCommon/test/PermuteTestImpl.hpp>
-#include <backendsCommon/test/TransposeConvolution2dTestImpl.hpp>
 
 #include <arm_compute/core/CL/CLKernelLibrary.h>
 #include <arm_compute/runtime/CL/CLScheduler.h>
@@ -179,10 +177,10 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetricNhwc,
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul64, DepthwiseConvolution2dDepthMul64Test);
 
 // Splitter
-ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest)
+ARMNN_AUTO_TEST_CASE(SimpleSplitterFloat, SplitterFloatTest)
 ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test)
 
-ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest)
+ARMNN_AUTO_TEST_CASE(CopyViaSplitterFloat, CopyViaSplitterFloatTest)
 ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test)
 
 // Concat
@@ -289,8 +287,8 @@ ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadca
 ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest)
 
 // Batch Norm
-ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
-ARMNN_AUTO_TEST_CASE(BatchNormNhwc, BatchNormNhwcTest)
+ARMNN_AUTO_TEST_CASE(BatchNormFloat, BatchNormFloatTest)
+ARMNN_AUTO_TEST_CASE(BatchNormFloatNhwc, BatchNormFloatNhwcTest)
 
 // L2 Normalization
 ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest, DataLayout::NCHW)
@@ -313,46 +311,46 @@ ARMNN_AUTO_TEST_CASE(Constant, ConstantTest)
 ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8SimpleQuantizationScaleNoOffsetTest)
 
 // Concat
-ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest)
-ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test, false)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test, false)
-
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest, false)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test, false)
-
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim0, Concatenation4dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim1, Concatenation4dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim3, Concatenation4dDim3Test, false)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim0Uint8, Concatenation4dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim1Uint8, Concatenation4dDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim3Uint8, Concatenation4dDim3Uint8Test, false)
-
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim0, Concatenation4dDiffShapeDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim1, Concatenation4dDiffShapeDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim3, Concatenation4dDiffShapeDim3Test, false)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim0Uint8, Concatenation4dDiffShapeDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim1Uint8, Concatenation4dDiffShapeDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim3Uint8, Concatenation4dDiffShapeDim3Uint8Test, false)
+ARMNN_AUTO_TEST_CASE(Concat1d, Concat1dTest)
+ARMNN_AUTO_TEST_CASE(Concat1dUint8, Concat1dUint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat2dDim0, Concat2dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim0Uint8, Concat2dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1, Concat2dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1Uint8, Concat2dDim1Uint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDims, Concat2dDim0DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDimsUint8, Concat2dDim0DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDims, Concat2dDim1DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDimsUint8, Concat2dDim1DiffInputDimsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat3dDim0, Concat3dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim0Uint8, Concat3dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1, Concat3dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1Uint8, Concat3dDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2, Concat3dDim2Test, false)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2Uint8, Concat3dDim2Uint8Test, false)
+
+ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDims, Concat3dDim0DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDimsUint8, Concat3dDim0DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDims, Concat3dDim1DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDimsUint8, Concat3dDim1DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDims, Concat3dDim2DiffInputDimsTest, false)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDimsUint8, Concat3dDim2DiffInputDimsUint8Test, false)
+
+ARMNN_AUTO_TEST_CASE(Concat4dDim0, Concat4dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim1, Concat4dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim3, Concat4dDim3Test, false)
+ARMNN_AUTO_TEST_CASE(Concat4dDim0Uint8, Concat4dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim1Uint8, Concat4dDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim3Uint8, Concat4dDim3Uint8Test, false)
+
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0, Concat4dDiffShapeDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1, Concat4dDiffShapeDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3, Concat4dDiffShapeDim3Test, false)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0Uint8, Concat4dDiffShapeDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1Uint8, Concat4dDiffShapeDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3Uint8, Concat4dDiffShapeDim3Uint8Test, false)
 
 // Floor
 ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest<DataType::Float32>)
@@ -473,28 +471,28 @@ ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsUint8, SpaceToBatchNdMultiChanne
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockUint8, SpaceToBatchNdMultiBlockUint8Test)
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingUint8, SpaceToBatchNdPaddingUint8Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCFloat32, SpaceToBatchNdSimpleNHWCFloat32Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCFloat32, SpaceToBatchNdMultiChannelsNHWCFloat32Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCFloat32, SpaceToBatchNdMultiBlockNHWCFloat32Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCFloat32, SpaceToBatchNdPaddingNHWCFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcFloat32, SpaceToBatchNdSimpleNhwcFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcFloat32, SpaceToBatchNdMultiChannelsNhwcFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcFloat32, SpaceToBatchNdMultiBlockNhwcFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcFloat32, SpaceToBatchNdPaddingNhwcFloat32Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCUint8, SpaceToBatchNdSimpleNHWCUint8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCUint8, SpaceToBatchNdMultiChannelsNHWCUint8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCUint8, SpaceToBatchNdMultiBlockNHWCUint8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCUint8, SpaceToBatchNdPaddingNHWCUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcUint8, SpaceToBatchNdSimpleNhwcUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcUint8, SpaceToBatchNdMultiChannelsNhwcUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcUint8, SpaceToBatchNdMultiBlockNhwcUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcUint8, SpaceToBatchNdPaddingNhwcUint8Test)
 
 // Space To Depth
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWAsymmQ8, SpaceToDepthNCHWAsymmQ8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCAsymmQ8, SpaceToDepthNHWCAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcAsymmQ8, SpaceToDepthNhwcAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC1Float32, SpaceToDepthNHWCFloat32Test1)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW1Float32, SpaceToDepthNCHWFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwx1Float32, SpaceToDepthNhwcFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw1Float32, SpaceToDepthNchwFloat32Test1)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC2Float32, SpaceToDepthNHWCFloat32Test2)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW2Float32, SpaceToDepthNCHWFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwc2Float32, SpaceToDepthNhwcFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw2Float32, SpaceToDepthNchwFloat32Test2)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCQSymm16, SpaceToDepthNHWCQSymm16Test)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWQSymm16, SpaceToDepthNCHWQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcQSymm16, SpaceToDepthNhwcQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwQSymm16, SpaceToDepthNchwQSymm16Test)
 
 // Stack
 ARMNN_AUTO_TEST_CASE(Stack0Axis,               Stack0AxisTest<DataType::Float32>)
@@ -505,25 +503,25 @@ ARMNN_AUTO_TEST_CASE(Stack3dOutput1Axis3Input, Stack3dOutput1Axis3InputTest<Data
 ARMNN_AUTO_TEST_CASE(Stack5dOutput,            Stack5dOutputTest<DataType::Float32>)
 
 // Strided Slice
-ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseFloat32, StridedSlice4DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dFloat32, StridedSlice4dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseFloat32, StridedSlice4dReverseFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideFloat32, StridedSliceSimpleStrideFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskFloat32, StridedSliceSimpleRangeMaskFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskFloat32, StridedSliceShrinkAxisMaskFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DFloat32, StridedSlice3DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseFloat32, StridedSlice3DReverseFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DFloat32, StridedSlice2DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseFloat32, StridedSlice2DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dFloat32, StridedSlice3dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseFloat32, StridedSlice3dReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dFloat32, StridedSlice2dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseFloat32, StridedSlice2dReverseFloat32Test)
 
-ARMNN_AUTO_TEST_CASE(StridedSlice4DUint8, StridedSlice4DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseUint8, StridedSlice4DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dUint8, StridedSlice4dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseUint8, StridedSlice4dReverseUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideUint8, StridedSliceSimpleStrideUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskUint8, StridedSliceSimpleRangeMaskUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskUint8, StridedSliceShrinkAxisMaskUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DUint8, StridedSlice3DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseUint8, StridedSlice3DReverseUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DUint8, StridedSlice2DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseUint8, StridedSlice2DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dUint8, StridedSlice3dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseUint8, StridedSlice3dReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dUint8, StridedSlice2dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseUint8, StridedSlice2dReverseUint8Test)
 
 // Resize Bilinear - NCHW
 ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear,
index 54fcfd3..33c377f 100644 (file)
@@ -6,7 +6,7 @@
 #include <backendsCommon/test/EndToEndTestImpl.hpp>
 
 #include <backendsCommon/test/ArithmeticTestImpl.hpp>
-#include <backendsCommon/test/ConcatTestImpl.hpp>
+#include <backendsCommon/test/ConcatEndToEndTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
 #include <backendsCommon/test/PreluEndToEndTestImpl.hpp>
 #include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
index fbd8c92..e0f2e84 100644 (file)
@@ -8,16 +8,13 @@
 #include <test/TensorHelpers.hpp>
 #include <test/UnitTests.hpp>
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonLayerSupport.hpp>
 #include <neon/NeonWorkloadFactory.hpp>
+
 #include <reference/RefWorkloadFactory.hpp>
+
 #include <backendsCommon/test/ActivationFixture.hpp>
 #include <backendsCommon/test/LayerTests.hpp>
-#include <backendsCommon/test/PermuteTestImpl.hpp>
-#include <backendsCommon/test/TensorCopyUtils.hpp>
-#include <backendsCommon/test/TransposeConvolution2dTestImpl.hpp>
-#include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <boost/test/unit_test.hpp>
 
@@ -370,23 +367,23 @@ ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxBeta1, Simple4dSoftmaxTest, 1.0f)
 ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxBeta1Uint8, Simple4dSoftmaxUint8Test, 1.0f)
 
 // SpaceToDepth
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWAsymmQ8, SpaceToDepthNCHWAsymmQ8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCAsymmQ8, SpaceToDepthNHWCAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcsymmQ8, SpaceToDepthNhwcAsymmQ8Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC1Float32, SpaceToDepthNHWCFloat32Test1)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW1Float32, SpaceToDepthNCHWFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwc1Float32, SpaceToDepthNhwcFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw1Float32, SpaceToDepthNchwFloat32Test1)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC2Float32, SpaceToDepthNHWCFloat32Test2)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW2Float32, SpaceToDepthNCHWFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwc2Float32, SpaceToDepthNhwcFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw2Float32, SpaceToDepthNchwFloat32Test2)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCQSymm16, SpaceToDepthNHWCQSymm16Test)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWQSymm16, SpaceToDepthNCHWQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcQSymm16, SpaceToDepthNhwcQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwQSymm16, SpaceToDepthNchwQSymm16Test)
 
 // Splitter
-ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest)
+ARMNN_AUTO_TEST_CASE(SimpleSplitterFloat, SplitterFloatTest)
 ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test)
 
-ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest)
+ARMNN_AUTO_TEST_CASE(CopyViaSplitteFloatr, CopyViaSplitterFloatTest)
 ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test)
 
 // Concat
@@ -428,54 +425,54 @@ ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadca
 ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest)
 
 // Batch Norm
-ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
-ARMNN_AUTO_TEST_CASE(BatchNormNhwc, BatchNormNhwcTest)
+ARMNN_AUTO_TEST_CASE(BatchNormFloat, BatchNormFloatTest)
+ARMNN_AUTO_TEST_CASE(BatchNormFloatNhwc, BatchNormFloatNhwcTest)
 
 // Constant
 ARMNN_AUTO_TEST_CASE(Constant, ConstantTest)
 ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8SimpleQuantizationScaleNoOffsetTest)
 
-// Concatenation
-ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest)
-ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test, false)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test, false)
-
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest, false)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test, false)
-
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim0, Concatenation4dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim1, Concatenation4dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim3, Concatenation4dDim3Test, false)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim0Uint8, Concatenation4dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim1Uint8, Concatenation4dDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim3Uint8, Concatenation4dDim3Uint8Test, false)
-
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim0, Concatenation4dDiffShapeDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim1, Concatenation4dDiffShapeDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim3, Concatenation4dDiffShapeDim3Test, false)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim0Uint8, Concatenation4dDiffShapeDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim1Uint8, Concatenation4dDiffShapeDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim3Uint8, Concatenation4dDiffShapeDim3Uint8Test, false)
+// Concat
+ARMNN_AUTO_TEST_CASE(Concat1d, Concat1dTest)
+ARMNN_AUTO_TEST_CASE(Concat1dUint8, Concat1dUint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat2dDim0, Concat2dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim0Uint8, Concat2dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1, Concat2dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1Uint8, Concat2dDim1Uint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDims, Concat2dDim0DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDimsUint8, Concat2dDim0DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDims, Concat2dDim1DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDimsUint8, Concat2dDim1DiffInputDimsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat3dDim0, Concat3dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim0Uint8, Concat3dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1, Concat3dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1Uint8, Concat3dDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2, Concat3dDim2Test, false)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2Uint8, Concat3dDim2Uint8Test, false)
+
+ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDims, Concat3dDim0DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDimsUint8, Concat3dDim0DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDims, Concat3dDim1DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDimsUint8, Concat3dDim1DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDims, Concat3dDim2DiffInputDimsTest, false)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDimsUint8, Concat3dDim2DiffInputDimsUint8Test, false)
+
+ARMNN_AUTO_TEST_CASE(Concat4dDim0, Concat4dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim1, Concat4dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim3, Concat4dDim3Test, false)
+ARMNN_AUTO_TEST_CASE(Concat4dDim0Uint8, Concat4dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim1Uint8, Concat4dDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim3Uint8, Concat4dDim3Uint8Test, false)
+
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0, Concat4dDiffShapeDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1, Concat4dDiffShapeDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3, Concat4dDiffShapeDim3Test, false)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0Uint8, Concat4dDiffShapeDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1Uint8, Concat4dDiffShapeDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3Uint8, Concat4dDiffShapeDim3Uint8Test, false)
 // L2 Normalization
 ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest, DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest, DataLayout::NCHW)
@@ -704,25 +701,25 @@ ARMNN_AUTO_TEST_CASE(ResizeNearestNeighborMagUint8Nhwc,
                      DataLayout::NHWC, 0.1f, 50, 0.1f, 50)
 
 // Strided Slice
-ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseFloat32, StridedSlice4DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dFloat32, StridedSlice4dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseFloat32, StridedSlice4dReverseFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideFloat32, StridedSliceSimpleStrideFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskFloat32, StridedSliceSimpleRangeMaskFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskFloat32, StridedSliceShrinkAxisMaskFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DFloat32, StridedSlice3DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseFloat32, StridedSlice3DReverseFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DFloat32, StridedSlice2DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseFloat32, StridedSlice2DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dFloat32, StridedSlice3dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseFloat32, StridedSlice3dReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dFloat32, StridedSlice2dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseFloat32, StridedSlice2dReverseFloat32Test)
 
-ARMNN_AUTO_TEST_CASE(StridedSlice4DUint8, StridedSlice4DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseUint8, StridedSlice4DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dUint8, StridedSlice4dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseUint8, StridedSlice4dReverseUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideUint8, StridedSliceSimpleStrideUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskUint8, StridedSliceSimpleRangeMaskUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskUint8, StridedSliceShrinkAxisMaskUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DUint8, StridedSlice3DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseUint8, StridedSlice3DReverseUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DUint8, StridedSlice2DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseUint8, StridedSlice2DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dUint8, StridedSlice3dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseUint8, StridedSlice3dReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dUint8, StridedSlice2dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseUint8, StridedSlice2dReverseUint8Test)
 
 // Quantize
 ARMNN_AUTO_TEST_CASE(QuantizeSimpleUint8, QuantizeSimpleUint8Test)
index ee42c9e..8cca3bc 100644 (file)
@@ -7,9 +7,9 @@
 
 #include <backendsCommon/test/ArithmeticTestImpl.hpp>
 #include <backendsCommon/test/BatchToSpaceNdEndToEndTestImpl.hpp>
-#include <backendsCommon/test/ConcatTestImpl.hpp>
+#include <backendsCommon/test/ConcatEndToEndTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
-#include <backendsCommon/test/DetectionPostProcessTestImpl.hpp>
+#include <backendsCommon/test/DetectionPostProcessEndToEndTestImpl.hpp>
 #include <backendsCommon/test/GatherEndToEndTestImpl.hpp>
 #include <backendsCommon/test/PreluEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ResizeEndToEndTestImpl.hpp>
@@ -965,4 +965,4 @@ BOOST_AUTO_TEST_CASE(RefImportAlignedPointerTest)
 
 #endif
 
-BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()
index 3adedff..59ca516 100644 (file)
@@ -5,16 +5,12 @@
 
 #include "RefWorkloadFactoryHelper.hpp"
 
-#include <test/TensorHelpers.hpp>
-#include <test/UnitTests.hpp>
+#include <backendsCommon/test/LayerTests.hpp>
 
 #include <reference/RefWorkloadFactory.hpp>
 
-#include <backendsCommon/test/DebugTestImpl.hpp>
-#include <backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp>
-#include <backendsCommon/test/LayerTests.hpp>
-#include <backendsCommon/test/PermuteTestImpl.hpp>
-#include <backendsCommon/test/TransposeConvolution2dTestImpl.hpp>
+#include <test/TensorHelpers.hpp>
+#include <test/UnitTests.hpp>
 
 #include <boost/test/unit_test.hpp>
 
@@ -456,11 +452,11 @@ ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false)
 ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true)
 
 // Splitter
-ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest)
+ARMNN_AUTO_TEST_CASE(SimpleSplitterFloat, SplitterFloatTest)
 ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test)
 ARMNN_AUTO_TEST_CASE(SimpleSplitterInt16, SplitterInt16Test)
 
-ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest)
+ARMNN_AUTO_TEST_CASE(CopyViaSplitterFloat, CopyViaSplitterFloatTest)
 ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test)
 ARMNN_AUTO_TEST_CASE(CopyViaSplitterInt16, CopyViaSplitterInt16Test)
 
@@ -564,8 +560,8 @@ ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorInt16, MultiplicationBroadca
 ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest)
 
 // Batch Norm
-ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
-ARMNN_AUTO_TEST_CASE(BatchNormNhwc, BatchNormNhwcTest)
+ARMNN_AUTO_TEST_CASE(BatchNormFloat, BatchNormFloatTest)
+ARMNN_AUTO_TEST_CASE(BatchNormFloatNhwc, BatchNormFloatNhwcTest)
 ARMNN_AUTO_TEST_CASE(BatchNormUint8, BatchNormUint8Test)
 ARMNN_AUTO_TEST_CASE(BatchNormUint8Nhwc, BatchNormUint8NhwcTest)
 ARMNN_AUTO_TEST_CASE(BatchNormInt16, BatchNormInt16Test)
@@ -820,50 +816,50 @@ ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8CustomQuantizationScaleAndOffse
 ARMNN_AUTO_TEST_CASE(ConstantInt16, ConstantInt16CustomQuantizationScaleAndOffsetTest)
 
 // Concat
-ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest)
-ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test, true)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test, true)
-
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest, true)
-ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test, true)
-
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim0, Concatenation4dDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim1, Concatenation4dDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim2, Concatenation4dDim2Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim3, Concatenation4dDim3Test, true)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim0Uint8, Concatenation4dDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim1Uint8, Concatenation4dDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim2Uint8, Concatenation4dDim2Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDim3Uint8, Concatenation4dDim3Uint8Test, true)
-
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim0, Concatenation4dDiffShapeDim0Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim1, Concatenation4dDiffShapeDim1Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim2, Concatenation4dDiffShapeDim2Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim3, Concatenation4dDiffShapeDim3Test, true)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim0Uint8, Concatenation4dDiffShapeDim0Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim1Uint8, Concatenation4dDiffShapeDim1Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim2Uint8, Concatenation4dDiffShapeDim2Uint8Test)
-ARMNN_AUTO_TEST_CASE(Concatenation4dDiffShapeDim3Uint8, Concatenation4dDiffShapeDim3Uint8Test, true)
+ARMNN_AUTO_TEST_CASE(Concat1d, Concat1dTest)
+ARMNN_AUTO_TEST_CASE(Concat1dUint8, Concat1dUint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat2dDim0, Concat2dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim0Uint8, Concat2dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1, Concat2dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1Uint8, Concat2dDim1Uint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDims, Concat2dDim0DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat2dDim0DiffInputDimsUint8, Concat2dDim0DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDims, Concat2dDim1DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat2dDim1DiffInputDimsUint8, Concat2dDim1DiffInputDimsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(Concat3dDim0, Concat3dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim0Uint8, Concat3dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1, Concat3dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1Uint8, Concat3dDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2, Concat3dDim2Test, true)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2Uint8, Concat3dDim2Uint8Test, true)
+
+ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDims, Concat3dDim0DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat3dDim0DiffInputDimsUint8, Concat3dDim0DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDims, Concat3dDim1DiffInputDimsTest)
+ARMNN_AUTO_TEST_CASE(Concat3dDim1DiffInputDimsUint8, Concat3dDim1DiffInputDimsUint8Test)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDims, Concat3dDim2DiffInputDimsTest, true)
+ARMNN_AUTO_TEST_CASE(Concat3dDim2DiffInputDimsUint8, Concat3dDim2DiffInputDimsUint8Test, true)
+
+ARMNN_AUTO_TEST_CASE(Concat4dDim0, Concat4dDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim1, Concat4dDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim2, Concat4dDim2Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim3, Concat4dDim3Test, true)
+ARMNN_AUTO_TEST_CASE(Concat4dDim0Uint8, Concat4dDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim1Uint8, Concat4dDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim2Uint8, Concat4dDim2Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDim3Uint8, Concat4dDim3Uint8Test, true)
+
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0, Concat4dDiffShapeDim0Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1, Concat4dDiffShapeDim1Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim2, Concat4dDiffShapeDim2Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3, Concat4dDiffShapeDim3Test, true)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim0Uint8, Concat4dDiffShapeDim0Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim1Uint8, Concat4dDiffShapeDim1Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim2Uint8, Concat4dDiffShapeDim2Uint8Test)
+ARMNN_AUTO_TEST_CASE(Concat4dDiffShapeDim3Uint8, Concat4dDiffShapeDim3Uint8Test, true)
 
 // Floor
 ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest<DataType::Float32>)
@@ -973,25 +969,25 @@ ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsUint8, SpaceToBatchNdMultiChanne
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockUint8, SpaceToBatchNdMultiBlockUint8Test)
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingUint8, SpaceToBatchNdPaddingUint8Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCFloat32, SpaceToBatchNdSimpleNHWCFloat32Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCFloat32, SpaceToBatchNdMultiChannelsNHWCFloat32Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCFloat32, SpaceToBatchNdMultiBlockNHWCFloat32Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCFloat32, SpaceToBatchNdPaddingNHWCFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcFloat32, SpaceToBatchNdSimpleNhwcFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcFloat32, SpaceToBatchNdMultiChannelsNhwcFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcFloat32, SpaceToBatchNdMultiBlockNhwcFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcFloat32, SpaceToBatchNdPaddingNhwcFloat32Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCUint8, SpaceToBatchNdSimpleNHWCUint8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCUint8, SpaceToBatchNdMultiChannelsNHWCUint8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCUint8, SpaceToBatchNdMultiBlockNHWCUint8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCUint8, SpaceToBatchNdPaddingNHWCUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcUint8, SpaceToBatchNdSimpleNhwcUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcUint8, SpaceToBatchNdMultiChannelsNhwcUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcUint8, SpaceToBatchNdMultiBlockNhwcUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcUint8, SpaceToBatchNdPaddingNhwcUint8Test)
 
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleUint16, SpaceToBatchNdSimpleUint16Test)
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsUint16, SpaceToBatchNdMultiChannelsUint16Test)
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockUint16, SpaceToBatchNdMultiBlockUint16Test)
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingUint16, SpaceToBatchNdPaddingUint16Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCUint16, SpaceToBatchNdSimpleNHWCUint16Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCUint16, SpaceToBatchNdMultiChannelsNHWCUint16Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCUint16, SpaceToBatchNdMultiBlockNHWCUint16Test)
-ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCUint16, SpaceToBatchNdPaddingNHWCUint16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNhwcUint16, SpaceToBatchNdSimpleNhwcUint16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNhwcUint16, SpaceToBatchNdMultiChannelsNhwcUint16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNhwcUint16, SpaceToBatchNdMultiBlockNhwcUint16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNhwcUint16, SpaceToBatchNdPaddingNhwcUint16Test)
 
 // BatchToSpace
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat1, BatchToSpaceNdNhwcTest1<DataType::Float32>)
@@ -1044,69 +1040,69 @@ ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwQsymm16_7,  BatchToSpaceNdNchwTest7<DataT
 
 
 // SpaceToDepth
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWAsymmQ8, SpaceToDepthNCHWAsymmQ8Test)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCAsymmQ8, SpaceToDepthNHWCAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcAsymmQ8, SpaceToDepthNhwcAsymmQ8Test)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC1Float32, SpaceToDepthNHWCFloat32Test1)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW1Float32, SpaceToDepthNCHWFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwc1Float32, SpaceToDepthNhwcFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw1Float32, SpaceToDepthNchwFloat32Test1)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC2Float32, SpaceToDepthNHWCFloat32Test2)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW2Float32, SpaceToDepthNCHWFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwc2Float32, SpaceToDepthNhwcFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchw2Float32, SpaceToDepthNchwFloat32Test2)
 
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCQSymm16, SpaceToDepthNHWCQSymm16Test)
-ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWQSymm16, SpaceToDepthNCHWQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNhwcQSymm16, SpaceToDepthNhwcQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwQSymm16, SpaceToDepthNchwQSymm16Test)
 
 // Strided Slice
-ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseFloat32, StridedSlice4DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dFloat32, StridedSlice4dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseFloat32, StridedSlice4dReverseFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideFloat32, StridedSliceSimpleStrideFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskFloat32, StridedSliceSimpleRangeMaskFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskFloat32, StridedSliceShrinkAxisMaskFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DFloat32, StridedSlice3DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseFloat32, StridedSlice3DReverseFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DFloat32, StridedSlice2DFloat32Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseFloat32, StridedSlice2DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dFloat32, StridedSlice3dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseFloat32, StridedSlice3dReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dFloat32, StridedSlice2dFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseFloat32, StridedSlice2dReverseFloat32Test)
 
-ARMNN_AUTO_TEST_CASE(StridedSlice4DUint8, StridedSlice4DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseUint8, StridedSlice4DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dUint8, StridedSlice4dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseUint8, StridedSlice4dReverseUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideUint8, StridedSliceSimpleStrideUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskUint8, StridedSliceSimpleRangeMaskUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskUint8, StridedSliceShrinkAxisMaskUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DUint8, StridedSlice3DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseUint8, StridedSlice3DReverseUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DUint8, StridedSlice2DUint8Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseUint8, StridedSlice2DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dUint8, StridedSlice3dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseUint8, StridedSlice3dReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dUint8, StridedSlice2dUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseUint8, StridedSlice2dReverseUint8Test)
 
-ARMNN_AUTO_TEST_CASE(StridedSlice4DInt16, StridedSlice4DInt16Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseInt16, StridedSlice4DReverseInt16Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dInt16, StridedSlice4dInt16Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4dReverseInt16, StridedSlice4dReverseInt16Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideInt16, StridedSliceSimpleStrideInt16Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskInt16, StridedSliceSimpleRangeMaskInt16Test)
 ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskInt16, StridedSliceShrinkAxisMaskInt16Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DInt16, StridedSlice3DInt16Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseInt16, StridedSlice3DReverseInt16Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DInt16, StridedSlice2DInt16Test)
-ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseInt16, StridedSlice2DReverseInt16Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dInt16, StridedSlice3dInt16Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3dReverseInt16, StridedSlice3dReverseInt16Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dInt16, StridedSlice2dInt16Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2dReverseInt16, StridedSlice2dReverseInt16Test)
 
 // Debug
-ARMNN_AUTO_TEST_CASE(Debug4DFloat32, Debug4DFloat32Test)
-ARMNN_AUTO_TEST_CASE(Debug3DFloat32, Debug3DFloat32Test)
-ARMNN_AUTO_TEST_CASE(Debug2DFloat32, Debug2DFloat32Test)
-ARMNN_AUTO_TEST_CASE(Debug1DFloat32, Debug1DFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug4dFloat32, Debug4dFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug3dFloat32, Debug3dFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug2dFloat32, Debug2dFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug1dFloat32, Debug1dFloat32Test)
 
-ARMNN_AUTO_TEST_CASE(Debug4DUint8, Debug4DUint8Test)
-ARMNN_AUTO_TEST_CASE(Debug3DUint8, Debug3DUint8Test)
-ARMNN_AUTO_TEST_CASE(Debug2DUint8, Debug2DUint8Test)
-ARMNN_AUTO_TEST_CASE(Debug1DUint8, Debug1DUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug4dUint8, Debug4dUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug3dUint8, Debug3dUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug2dUint8, Debug2dUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug1dUint8, Debug1dUint8Test)
 
-ARMNN_AUTO_TEST_CASE(Debug4DQSymm16, Debug4DTest<DataType::QuantisedSymm16>)
-ARMNN_AUTO_TEST_CASE(Debug3DQSymm16, Debug3DTest<DataType::QuantisedSymm16>)
-ARMNN_AUTO_TEST_CASE(Debug2DQSymm16, Debug2DTest<DataType::QuantisedSymm16>)
-ARMNN_AUTO_TEST_CASE(Debug1DQSymm16, Debug1DTest<DataType::QuantisedSymm16>)
+ARMNN_AUTO_TEST_CASE(Debug4dQSymm16, Debug4dInt16Test)
+ARMNN_AUTO_TEST_CASE(Debug3dQSymm16, Debug3dInt16Test)
+ARMNN_AUTO_TEST_CASE(Debug2dQSymm16, Debug2dInt16Test)
+ARMNN_AUTO_TEST_CASE(Debug1dQSymm16, Debug1dInt16Test)
 
 // Gather
-ARMNN_AUTO_TEST_CASE(Gather1DParamsFloat, Gather1DParamsFloatTest)
-ARMNN_AUTO_TEST_CASE(Gather1DParamsUint8, Gather1DParamsUint8Test)
-ARMNN_AUTO_TEST_CASE(Gather1DParamsInt16, Gather1DParamsInt16Test)
+ARMNN_AUTO_TEST_CASE(Gather1dParamsFloat, Gather1dParamsFloatTest)
+ARMNN_AUTO_TEST_CASE(Gather1dParamsUint8, Gather1dParamsUint8Test)
+ARMNN_AUTO_TEST_CASE(Gather1dParamsInt16, Gather1dParamsInt16Test)
 ARMNN_AUTO_TEST_CASE(GatherMultiDimParamsFloat, GatherMultiDimParamsFloatTest)
 ARMNN_AUTO_TEST_CASE(GatherMultiDimParamsUint8, GatherMultiDimParamsUint8Test)
 ARMNN_AUTO_TEST_CASE(GatherMultiDimParamsInt16, GatherMultiDimParamsInt16Test)