IVGCVSW-3025: Refactor reference Convolution2d workload
authorMike Kelly <mike.kelly@arm.com>
Wed, 22 May 2019 16:21:49 +0000 (17:21 +0100)
committerRuomei Yan <ruomei.yan@arm.com>
Thu, 23 May 2019 09:23:19 +0000 (09:23 +0000)
 * Refactored RefConvolution2dWorkload to support all DataTypes through Encoders and Decoders.
 * Added Convolute function to ConvImpl that uses Encoders and Decoders to support all DataTypes.
 * Deleted RefConvolution2dFloat32Workload and RefConvolution2dUint8Workload.

Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ic5ef0f499d08b948fa65fdee54b5f681fd0b1c05

18 files changed:
src/backends/backendsCommon/test/Conv2dTestImpl.hpp
src/backends/backendsCommon/test/LayerTests.cpp
src/backends/backendsCommon/test/WorkloadTestUtils.hpp
src/backends/reference/RefWorkloadFactory.cpp
src/backends/reference/backend.mk
src/backends/reference/test/RefCreateWorkloadTests.cpp
src/backends/reference/workloads/BaseIterator.hpp
src/backends/reference/workloads/CMakeLists.txt
src/backends/reference/workloads/ConvImpl.cpp
src/backends/reference/workloads/ConvImpl.hpp
src/backends/reference/workloads/Decoders.hpp
src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp [deleted file]
src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp [deleted file]
src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp [deleted file]
src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp [deleted file]
src/backends/reference/workloads/RefConvolution2dWorkload.cpp [new file with mode: 0644]
src/backends/reference/workloads/RefConvolution2dWorkload.hpp [new file with mode: 0644]
src/backends/reference/workloads/RefWorkloads.hpp

index bb5656b..0533c77 100644 (file)
@@ -1125,6 +1125,7 @@ LayerTestResult<T,4> CompareConvolution2dTestImpl(
 
     ExecuteWorkload(*workload, memoryManager);
 
+    workloadRef->PostAllocationConfigure();
     workloadRef->Execute();
 
     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
index 5d05959..c94dc53 100644 (file)
@@ -79,12 +79,12 @@ static std::vector<float> Bias2({0, 2});
 
 // Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled.
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset)
+boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
 {
     if(biasEnabled)
     {
         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
-        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2));
+        boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias2));
         return bias;
     }
     else
@@ -170,7 +170,7 @@ LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale, qOffset),
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
@@ -247,7 +247,7 @@ LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale, qOffset),
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
@@ -494,7 +494,7 @@ LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(false, qScale, qOffset),
+        GetBias2<ArmnnBType>(false, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
@@ -552,7 +552,7 @@ LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(false, qScale, qOffset),
+        GetBias2<ArmnnBType>(false, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
@@ -627,7 +627,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale, qOffset),
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
@@ -736,7 +736,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale, qOffset),
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
@@ -802,7 +802,7 @@ LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
         memoryManager,
         input,
         kernel,
-        GetBias2<ArmnnBType>(biasEnabled, qScale, qOffset),
+        GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
         expectedOutput,
         qScale,
         qOffset,
index d03c5a9..212fea3 100644 (file)
@@ -73,6 +73,9 @@ inline void ExecuteWorkload(armnn::IWorkload& workload,
         memoryManager->Acquire();
     }
 
+    // Perform PostAllocationConfiguration
+    workload.PostAllocationConfigure();
+
     // Execute the workload
     workload.Execute();
 
index 6544856..be92094 100644 (file)
@@ -160,7 +160,7 @@ std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreatePooling2d(const Pool
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateConvolution2d(
     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
-    return MakeWorkload<RefConvolution2dFloat32Workload, RefConvolution2dUint8Workload>(descriptor, info);
+    return std::make_unique<RefConvolution2dWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d(
index 1d635aa..3146fca 100644 (file)
@@ -33,8 +33,7 @@ BACKEND_SOURCES := \
         workloads/RefConstantWorkload.cpp \
         workloads/RefConvertFp16ToFp32Workload.cpp \
         workloads/RefConvertFp32ToFp16Workload.cpp \
-        workloads/RefConvolution2dFloat32Workload.cpp \
-        workloads/RefConvolution2dUint8Workload.cpp \
+        workloads/RefConvolution2dWorkload.cpp \
         workloads/RefDebugWorkload.cpp \
         workloads/RefDepthwiseConvolution2dFloat32Workload.cpp \
         workloads/RefDepthwiseConvolution2dUint8Workload.cpp \
index 4827d28..62e8788 100644 (file)
@@ -254,7 +254,7 @@ static void RefCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayou
 {
     Graph graph;
     RefWorkloadFactory factory;
-    auto workload = CreateConvolution2dWorkloadTest<RefConvolution2dFloat32Workload, DataType::Float32>
+    auto workload = CreateConvolution2dWorkloadTest<RefConvolution2dWorkload, DataType::Float32>
                     (factory, graph, dataLayout);
 
     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW) ?
index 97af95a..ab6de2b 100644 (file)
@@ -123,6 +123,21 @@ public:
     }
 };
 
+class ScaledInt32Decoder : public TypedIterator<const int32_t, Decoder<float>>
+{
+public:
+    ScaledInt32Decoder(const int32_t* data, const float scale)
+        : TypedIterator(data), m_Scale(scale) {}
+
+    float Get() const override
+    {
+        return static_cast<float>(*m_Iterator) * m_Scale;
+    }
+
+private:
+    const float m_Scale;
+};
+
 class QASymm8Encoder : public TypedIterator<uint8_t, Encoder<float>>
 {
 public:
index 6e4d763..ab4fea6 100644 (file)
@@ -51,10 +51,8 @@ list(APPEND armnnRefBackendWorkloads_sources
     RefConvertFp16ToFp32Workload.hpp
     RefConvertFp32ToFp16Workload.cpp
     RefConvertFp32ToFp16Workload.hpp
-    RefConvolution2dFloat32Workload.cpp
-    RefConvolution2dFloat32Workload.hpp
-    RefConvolution2dUint8Workload.cpp
-    RefConvolution2dUint8Workload.hpp
+    RefConvolution2dWorkload.cpp
+    RefConvolution2dWorkload.hpp
     RefElementwiseWorkload.cpp
     RefElementwiseWorkload.hpp
     RefDebugWorkload.cpp
index 8743a2b..6a5ac53 100644 (file)
@@ -68,4 +68,177 @@ int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int ex
     return (x >> exponent) + (remainder > threshold ? 1 : 0);
 }
 
+inline unsigned int GetOffset(DataLayout& dataLayout, const TensorShape& shape, unsigned int b, unsigned int c,
+                              unsigned int h, unsigned int w)
+{
+    switch (dataLayout)
+    {
+        case DataLayout::NHWC:
+            b *= shape[1] * shape[2] * shape[3];
+            h *= shape[2] * shape[3];
+            w *= shape[3];
+            break;
+        case DataLayout::NCHW:
+        default:
+            b *= shape[1] * shape[2] * shape[3];
+            c *= shape[2] * shape[3];
+            h *= shape[3];
+            break;
+    }
+    return b + c + h + w;
+}
+
+void Convolve(const TensorShape& rInputShape,
+              Decoder<float>& rInputDecoder,
+              const TensorShape& rOutputShape,
+              Encoder<float>& rOutputEncoder,
+              const TensorShape& rFilterShape,
+              Decoder<float>& rFilterDecoder,
+              bool biasEnabled,
+              Decoder<float>* pBiasDecoder,
+              DataLayout dataLayout,
+              unsigned int paddingTop,
+              unsigned int paddingLeft,
+              unsigned int xStride,
+              unsigned int yStride,
+              unsigned int xDilation,
+              unsigned int yDilation,
+              bool depthwise)
+{
+    if (biasEnabled && !pBiasDecoder)
+    {
+        throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
+    }
+    const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
+
+    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
+
+    unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
+    unsigned int inputChannels   = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
+    unsigned int outputChannels  = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
+
+    unsigned int batchSize    = rOutputShape[0];
+    unsigned int outputHeight = rOutputShape[heightIndex];
+    unsigned int outputWidth  = rOutputShape[widthIndex];
+    unsigned int inputHeight  = rInputShape[heightIndex];
+    unsigned int inputWidth   = rInputShape[widthIndex];
+
+    unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
+    unsigned int filterWidth  = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
+
+    for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
+    {
+        for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
+        {
+            for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
+            {
+                for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
+                {
+                    // This loop goes over each output element.
+                    float sum =  0.0f;
+
+                    // For depthwise, each output channel corresponds to exactly one input channel.
+                    // For normal, must loop over each input channel.
+                    for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
+                    {
+                        unsigned int depthwiseMultiplierIdx = 0;
+                        if (depthwise)
+                        {
+                            cInput = cOutput / depthMultiplier;
+                            depthwiseMultiplierIdx = cOutput % depthMultiplier;
+                        }
+
+                        for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
+                        {
+                            for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
+                            {
+                                // This loop goes over each input element for each output element.
+                                unsigned int filterIndex = 0;
+
+                                // Since dimensionality of kernel depends on depthwiseness, so does index.
+                                if (depthwise)
+                                {
+                                    filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
+                                                  cInput * filterWidth * filterHeight +
+                                                  yFilter * filterWidth +
+                                                  xFilter;
+                                }
+                                else
+                                {
+                                    if (dataLayout == DataLayout::NHWC)
+                                    {
+                                        filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
+                                                      yFilter * filterWidth * inputChannels +
+                                                      xFilter * inputChannels +
+                                                      cInput;
+                                    }
+                                    else
+                                    {
+                                        filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
+                                                      cInput  * filterWidth * filterHeight +
+                                                      yFilter * filterWidth +
+                                                      xFilter;
+                                    }
+                                }
+                                rFilterDecoder += filterIndex;
+                                float filterValue = rFilterDecoder.Get();
+                                rFilterDecoder -= filterIndex;
+
+                                unsigned int yInput = yOutput * yStride + yFilter * yDilation;
+                                unsigned int xInput = xOutput * xStride + xFilter * xDilation;
+
+                                float inputValue;
+
+                                // Check if we're in the padding.
+                                if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
+                                    xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
+                                {
+                                    inputValue = 0.0f;
+                                }
+                                else
+                                {
+                                    unsigned int inputIndex;
+
+                                    if (dataLayout == DataLayout::NHWC)
+                                    {
+                                        inputIndex = batchIdx * inputHeight * inputWidth  * inputChannels +
+                                                     (yInput - paddingTop) * inputWidth * inputChannels +
+                                                     (xInput - paddingLeft) * inputChannels +
+                                                     cInput;
+                                    }
+                                    else
+                                    {
+                                        inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
+                                                     inputWidth * inputHeight * cInput +
+                                                     inputWidth * (yInput - paddingTop) +
+                                                     xInput - paddingLeft;
+                                    }
+                                    rInputDecoder += inputIndex;
+                                    inputValue = rInputDecoder.Get();
+                                    rInputDecoder -= inputIndex;
+                                }
+                                sum += filterValue * inputValue;
+                            }
+                        }
+                    }
+
+                    if (biasEnabled)
+                    {
+                        *pBiasDecoder += cOutput;
+                        sum += pBiasDecoder->Get();
+                        *pBiasDecoder -= cOutput;
+                    }
+                    unsigned int outIdx = GetOffset(dataLayout, rOutputShape, batchIdx, cOutput, yOutput, xOutput);
+
+                    rOutputEncoder += outIdx;
+                    rOutputEncoder.Set(sum);
+                    rOutputEncoder -= outIdx;
+                }
+            }
+        }
+    }
+}
+
 } //namespace armnn
index 23b402a..3551ba8 100644 (file)
@@ -7,6 +7,9 @@
 
 #include "RefWorkloadUtils.hpp"
 #include "TensorBufferArrayView.hpp"
+#include "BaseIterator.hpp"
+#include "Decoders.hpp"
+#include "Encoders.hpp"
 
 #include <armnn/Tensor.hpp>
 
@@ -224,4 +227,20 @@ static void ConvImpl(ConvData data,
     }
 }
 
+void Convolve(const TensorShape& rInputShape,
+              Decoder<float>& rInputDecoder,
+              const TensorShape& rOutputShape,
+              Encoder<float>& rOutputEncoder,
+              const TensorShape& rFilterShape,
+              Decoder<float>& rFilterDecoder,
+              bool biasEnabled,
+              Decoder<float>* pBiasDecoder,
+              DataLayout dataLayout,
+              unsigned int paddingTop,
+              unsigned int paddingLeft,
+              unsigned int xStride,
+              unsigned int yStride,
+              unsigned int xDilation,
+              unsigned int yDilation,
+              bool depthwise = false);
 } //namespace armnn
index acf20c4..57c19a2 100644 (file)
@@ -36,6 +36,12 @@ inline std::unique_ptr<Decoder<float>> MakeDecoder(const TensorInfo& info, const
         {
             return std::make_unique<FloatDecoder>(static_cast<const float*>(data));
         }
+        case armnn::DataType::Signed32:
+        {
+            return std::make_unique<ScaledInt32Decoder>(
+                    static_cast<const int32_t*>(data),
+                    info.GetQuantizationScale());
+        }
         default:
         {
             BOOST_ASSERT_MSG(false, "Not supported Data Type!");
diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
deleted file mode 100644 (file)
index 7b298df..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefConvolution2dFloat32Workload.hpp"
-
-#include "ConvImpl.hpp"
-#include "RefWorkloadUtils.hpp"
-
-#include "Profiling.hpp"
-
-namespace armnn
-{
-RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload(
-    const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
-        : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info),
-          m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
-          m_Bias(descriptor.m_Parameters.m_BiasEnabled
-                 ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
-
-void RefConvolution2dFloat32Workload::Execute() const
-{
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute");
-
-    const float* inputData  = GetInputTensorDataFloat(0, m_Data);
-    const float* filterData = m_Weight->template GetConstTensor<float>();
-    const float* biasData   = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr;
-    const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
-
-    ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>(
-        m_Data, inputData, 0.0f, 0, filterData, 0.0f, 0, biasData, 0.0f, 0, filterInfo);
-}
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp
deleted file mode 100644 (file)
index 5ff743d..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <backendsCommon/Workload.hpp>
-#include <backendsCommon/WorkloadData.hpp>
-
-namespace armnn
-{
-
-class RefConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
-{
-public:
-    explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& info);
-    virtual void Execute() const override;
-
-private:
-    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
-    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
-
-};
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
deleted file mode 100644 (file)
index af2c7ad..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefConvolution2dUint8Workload.hpp"
-
-#include "ConvImpl.hpp"
-#include "RefWorkloadUtils.hpp"
-
-#include "Profiling.hpp"
-
-namespace armnn
-{
-RefConvolution2dUint8Workload::RefConvolution2dUint8Workload(
-    const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
-        : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info),
-          m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight))),
-          m_Bias(descriptor.m_Parameters.m_BiasEnabled
-                 ? std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias)) : nullptr) {}
-
-void RefConvolution2dUint8Workload::Execute() const
-{
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute");
-
-    const uint8_t* inputData = GetInputTensorDataU8(0, m_Data);
-    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
-    const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
-    const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
-    const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr;
-    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
-    const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
-
-    ConvImpl<armnn::Convolution2dQueueDescriptor, uint8_t, int32_t, int32_t>(
-        m_Data,
-        inputData, inputInfo.GetQuantizationScale(),  inputInfo.GetQuantizationOffset(),
-        weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
-        biasData,
-        outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
-}
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp
deleted file mode 100644 (file)
index a58f23a..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <backendsCommon/Workload.hpp>
-#include <backendsCommon/WorkloadData.hpp>
-
-namespace armnn
-{
-
-class RefConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
-{
-public:
-    explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
-                                             const WorkloadInfo& info);
-
-    virtual void Execute() const override;
-
-private:
-    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
-    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
-
-};
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
new file mode 100644 (file)
index 0000000..0824d5c
--- /dev/null
@@ -0,0 +1,53 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvolution2dWorkload.hpp"
+
+#include "ConvImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefConvolution2dWorkload::RefConvolution2dWorkload(
+        const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+        : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
+{
+    m_Weight = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight));
+    const TensorInfo& rFilterInfo = GetTensorInfo(m_Weight.get());
+    m_FilterShape = rFilterInfo.GetShape();
+    m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight.get()->Map(true));
+
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        m_Bias = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
+        const TensorInfo& biasInfo = GetTensorInfo(m_Bias.get());
+        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias.get()->Map(true));
+    }
+}
+
+void RefConvolution2dWorkload::PostAllocationConfigure()
+{
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    m_InputShape = inputInfo.GetShape();
+    m_InputDecoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+    m_OutputShape = outputInfo.GetShape();
+    m_OutputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+}
+
+void RefConvolution2dWorkload::Execute() const {
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dWorkload_Execute");
+
+    Convolve(m_InputShape, *m_InputDecoder, m_OutputShape, *m_OutputEncoder, m_FilterShape,
+             *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(),
+             m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft,
+             m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY,
+             m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp
new file mode 100644 (file)
index 0000000..b6bdf23
--- /dev/null
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+namespace armnn
+{
+
+class RefConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
+{
+public:
+    explicit RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+                                      const WorkloadInfo& info);
+
+    void PostAllocationConfigure() override;
+
+    virtual void Execute() const override;
+
+private:
+    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+
+    std::unique_ptr<Decoder<float>> m_InputDecoder;
+    std::unique_ptr<Encoder<float>> m_OutputEncoder;
+    std::unique_ptr<Decoder<float>> m_FilterDecoder;
+    std::unique_ptr<Decoder<float>> m_BiasDecoder;
+
+    TensorShape m_InputShape;
+    TensorShape m_OutputShape;
+    TensorShape m_FilterShape;
+};
+
+} //namespace armnn
+
index 314e65b..5a65f60 100644 (file)
@@ -9,7 +9,7 @@
 #include "RefElementwiseWorkload.hpp"
 #include "ConvImpl.hpp"
 #include "RefConstantWorkload.hpp"
-#include "RefConvolution2dUint8Workload.hpp"
+#include "RefConvolution2dWorkload.hpp"
 #include "RefSplitterUint8Workload.hpp"
 #include "RefResizeBilinearUint8Workload.hpp"
 #include "RefL2NormalizationFloat32Workload.hpp"
@@ -46,7 +46,6 @@
 #include "RefSpaceToBatchNdWorkload.hpp"
 #include "RefSplitterFloat32Workload.hpp"
 #include "RefStridedSliceWorkload.hpp"
-#include "RefConvolution2dFloat32Workload.hpp"
 #include "Pooling2d.hpp"
 #include "RefFakeQuantizationFloat32Workload.hpp"
 #include "RefPermuteWorkload.hpp"