IVGCVSW-3837 Add support for per-axis quantization to reference Convolution2d workload
authorAron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Tue, 5 Nov 2019 18:00:21 +0000 (18:00 +0000)
committerFrancis Murtagh <francis.murtagh@arm.com>
Wed, 6 Nov 2019 12:10:02 +0000 (12:10 +0000)
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: I0ac08ba4864d48e6f64c4ac645dad8ea850be112

12 files changed:
include/armnn/Tensor.hpp
include/armnn/TypesUtils.hpp
src/armnn/Tensor.cpp
src/armnnUtils/TensorUtils.cpp
src/backends/backendsCommon/WorkloadData.cpp
src/backends/backendsCommon/test/WorkloadDataValidation.cpp
src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp
src/backends/reference/RefLayerSupport.cpp
src/backends/reference/test/RefLayerTests.cpp
src/backends/reference/workloads/BaseIterator.hpp
src/backends/reference/workloads/ConvImpl.cpp

index 57a2438..d41cbb4 100644 (file)
@@ -97,6 +97,8 @@ public:
 
     bool HasMultipleQuantizationScales() const       { return m_Quantization.m_Scales.size() > 1; }
 
+    bool HasPerAxisQuantization() const;
+
     std::vector<float> GetQuantizationScales() const;
     void SetQuantizationScales(const std::vector<float>& scales);
 
index e2294af..cdcbd3c 100644 (file)
@@ -102,13 +102,14 @@ constexpr unsigned int GetDataTypeSize(DataType dataType)
 {
     switch (dataType)
     {
-        case DataType::Float16:          return 2U;
+        case DataType::Float16:               return 2U;
         case DataType::Float32:
-        case DataType::Signed32:         return 4U;
-        case DataType::QuantisedAsymm8:  return 1U;
-        case DataType::QuantisedSymm16:  return 2U;
-        case DataType::Boolean:          return 1U;
-        default:                         return 0U;
+        case DataType::Signed32:              return 4U;
+        case DataType::QuantisedAsymm8:       return 1U;
+        case DataType::QuantizedSymm8PerAxis: return 1U;
+        case DataType::QuantisedSymm16:       return 2U;
+        case DataType::Boolean:               return 1U;
+        default:                              return 0U;
     }
 }
 
index f4b8b50..dad9722 100644 (file)
@@ -230,6 +230,11 @@ bool TensorInfo::IsTypeSpaceMatch(const TensorInfo& other) const
     return match;
 }
 
+bool TensorInfo::HasPerAxisQuantization() const
+{
+    return HasMultipleQuantizationScales() || m_Quantization.m_QuantizationDim.has_value();
+}
+
 std::vector<float> TensorInfo::GetQuantizationScales() const
 {
     return m_Quantization.m_Scales;
index 630490f..6012774 100644 (file)
@@ -142,7 +142,7 @@ unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int a
 {
     unsigned int numDim = shape.GetNumDimensions();
     BOOST_ASSERT(0 >= axis);
-    BOOST_ASSERT(axis < numDim - 1);
+    BOOST_ASSERT(axis <= numDim - 1);
     unsigned int count = 1;
     for (unsigned int i = axis; i < numDim; i++)
     {
@@ -155,7 +155,7 @@ std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::Tensor
 {
     const std::vector<float>& scales = info.GetQuantizationScales();
     armnn::Optional<unsigned int> quantizationDim = info.GetQuantizationDim();
-    if (scales.size() < 1 || !quantizationDim.has_value())
+    if (!info.HasPerAxisQuantization())
     {
         throw armnn::InvalidArgumentException(
             std::string("Per-axis quantization params not set for tensor of type ") +
@@ -166,5 +166,4 @@ std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::Tensor
     return { axisFactor, scales };
 }
 
-
 } // namespace armnnUtils
index e1a369a..201cc7d 100644 (file)
@@ -338,6 +338,102 @@ void ValidateTensorNumElementsMatch(const TensorInfo& first,
     }
 }
 
+void ValidateWeightDataType(const TensorInfo& inputInfo,
+                            const TensorInfo& weightInfo,
+                            const std::string& descName)
+{
+    const DataType inputType = inputInfo.GetDataType();
+    if (inputType == DataType::QuantisedAsymm8)
+    {
+        const std::vector<DataType> validTypes =
+        {
+            DataType::QuantisedAsymm8,
+            DataType::QuantizedSymm8PerAxis
+        };
+
+        ValidateDataTypes(weightInfo, validTypes, descName);
+    }
+    else
+    {
+        ValidateTensorDataTypesMatch(inputInfo, weightInfo, descName, "input", "weight");
+    }
+}
+
+void ValidatePerAxisQuantizationDimension(const TensorInfo& tensorInfo,
+                                          const std::string& descName,
+                                          const std::string& tensorName)
+{
+    const Optional<unsigned int>& quantizationDim = tensorInfo.GetQuantizationDim();
+    if (!quantizationDim.has_value())
+    {
+        throw InvalidArgumentException(boost::str(
+            boost::format("%1%: Quantization dimension for per-axis quantization not set on tensor %2%.")
+            % descName % tensorName));
+    }
+
+    if (quantizationDim.value() != 0)
+    {
+        throw InvalidArgumentException(boost::str(
+            boost::format("%1%: Quantization dimension for per-axis quantization expected to be 0 on tensor %2%, "
+            "but got: %3%") % descName % tensorName % quantizationDim.value()));
+    }
+}
+
+void ValidatePerAxisQuantizationOffset(const TensorInfo& tensorInfo,
+                                       const std::string& descName,
+                                       const std::string& tensorName)
+{
+    int32_t quantizationOffset = tensorInfo.GetQuantizationOffset();
+    if (quantizationOffset != 0)
+    {
+        throw InvalidArgumentException(boost::str(
+            boost::format("%1%: Quantization offset for per-axis quantization expected to be 0 on tensor %2%, "
+            "but got: %3%") % descName % tensorName % quantizationOffset));
+    }
+}
+
+void ValidatePerAxisQuantization(const TensorInfo& inputInfo,
+                                 const TensorInfo& outputInfo,
+                                 const TensorInfo& weightInfo,
+                                 const Optional<TensorInfo>& optionalBiasInfo,
+                                 const std::string& descName)
+{
+    if (weightInfo.HasPerAxisQuantization())
+    {
+        const DataType inputDataType  = inputInfo.GetDataType();
+        const DataType outputDataType = outputInfo.GetDataType();
+
+        const bool canHavePerAxisQuantization =
+            inputDataType == DataType::QuantisedAsymm8 && inputDataType == outputDataType;
+
+        if (!canHavePerAxisQuantization)
+        {
+            throw InvalidArgumentException(boost::str(
+                boost::format("%1%: Per-axis quantization parameters set on tensor %2%, "
+                "but data type does not support per-axis quantization.") % descName % "weight"));
+        }
+
+        ValidateTensorDataType(weightInfo, DataType::QuantizedSymm8PerAxis, descName, "weight");
+        ValidatePerAxisQuantizationDimension(weightInfo, descName, "weight");
+        ValidatePerAxisQuantizationOffset(weightInfo, descName, "weight");
+
+        if (optionalBiasInfo.has_value())
+        {
+            const TensorInfo& biasInfo = optionalBiasInfo.value();
+            if (!biasInfo.HasPerAxisQuantization())
+            {
+                throw InvalidArgumentException(boost::str(
+                    boost::format("%1%: Per-axis quantization parameters not set on bias tensor, despite being set on "
+                    "weight tensor.") % descName));
+            }
+
+            ValidateTensorDataType(biasInfo, DataType::Signed32, descName, "bias");
+            ValidatePerAxisQuantizationDimension(biasInfo, descName, "bias");
+            ValidatePerAxisQuantizationOffset(biasInfo, descName, "bias");
+        }
+    }
+}
+
 } // anonymous namespace
 
 void QueueDescriptor::ValidateInputsOutputs(const std::string& descName,
@@ -1040,19 +1136,26 @@ void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) co
     const TensorInfo& weightTensorInfo = m_Weight->GetTensorInfo();
     ValidateTensorNumDimensions(weightTensorInfo, descriptorName, 4, "weight");
 
-    ValidateTensorDataTypesMatch(inputTensorInfo, weightTensorInfo, descriptorName, "input", "weight");
+    ValidateWeightDataType(inputTensorInfo, weightTensorInfo, descriptorName);
 
+    Optional<TensorInfo> optionalBiasTensorInfo;
     if (m_Parameters.m_BiasEnabled)
     {
         ValidatePointer(m_Bias, descriptorName, "bias");
 
-        const TensorInfo& biasTensorInfo = m_Bias->GetTensorInfo();
-        ValidateTensorNumDimensions(biasTensorInfo, descriptorName, 1, "bias");
+        optionalBiasTensorInfo = MakeOptional<TensorInfo>(m_Bias->GetTensorInfo());
+        const TensorInfo& biasTensorInfo = optionalBiasTensorInfo.value();
 
         ValidateTensorDataType(biasTensorInfo, GetBiasDataType(inputTensorInfo.GetDataType()), descriptorName, "bias");
         ValidateBiasTensorQuantization(biasTensorInfo, inputTensorInfo, weightTensorInfo, descriptorName);
     }
 
+    ValidatePerAxisQuantization(inputTensorInfo,
+                                outputTensorInfo,
+                                weightTensorInfo,
+                                optionalBiasTensorInfo,
+                                descriptorName);
+
     std::vector<DataType> supportedTypes =
     {
         DataType::Float32,
index 9773914..70d00b3 100644 (file)
@@ -605,15 +605,16 @@ BOOST_AUTO_TEST_CASE(BiasPerAxisQuantization_Validate)
     const TensorShape weightShape{ cOutput, cInput,  hInput,  wInput  };
     const TensorShape biasShape  { cOutput                            };
 
-    constexpr DataType dataType = DataType::QuantisedAsymm8;
-    constexpr DataType biasType = DataType::Signed32;
+    constexpr DataType inputType  = DataType::QuantisedAsymm8;
+    constexpr DataType weightType = DataType::QuantizedSymm8PerAxis;
+    constexpr DataType biasType   = DataType::Signed32;
 
     constexpr float perTensorScale = 1.5f;
-    const TensorInfo inputInfo (inputShape,  dataType, perTensorScale);
-    const TensorInfo outputInfo(outputShape, dataType, perTensorScale);
+    const TensorInfo inputInfo (inputShape,  inputType, perTensorScale);
+    const TensorInfo outputInfo(outputShape, inputType, perTensorScale);
 
     const std::vector<float> weightPerAxisScales = { 2.50f, 3.50f };
-    const TensorInfo weightInfo(weightShape, dataType, weightPerAxisScales, 0);
+    const TensorInfo weightInfo(weightShape, weightType, weightPerAxisScales, 0);
 
     Convolution2dQueueDescriptor queueDescriptor;
     queueDescriptor.m_Parameters.m_BiasEnabled = true;
index 198904e..5fac09f 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <backendsCommon/test/DataLayoutUtils.hpp>
 #include <backendsCommon/test/TensorCopyUtils.hpp>
 #include <backendsCommon/test/WorkloadTestUtils.hpp>
 
@@ -3035,6 +3036,98 @@ LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
             workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
 }
 
+LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout)
+{
+    using namespace armnn;
+
+    const DataType inputType  = DataType::QuantisedAsymm8;
+    const DataType kernelType = DataType::QuantizedSymm8PerAxis;
+    const DataType biasType   = DataType::Signed32;
+
+    TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
+    TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
+
+    const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
+    constexpr unsigned int quantDimension = 0;
+
+    TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
+
+    const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
+    TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
+
+    std::vector<uint8_t> inputData =
+    {
+        138, 108, 138, 108, 138, 108
+    };
+
+    std::vector<int8_t> kernelData =
+    {
+        1, 2, 1, 2, 1, 2
+    };
+
+    std::vector<int32_t> biasData =
+    {
+        4, 4, 4
+    };
+
+    std::vector<uint8_t> expectedOutputData =
+    {
+        121, 118, 115, 121, 118, 115, 121, 118, 115
+    };
+
+    if (layout == DataLayout::NCHW)
+    {
+        PermuteTensorNhwcToNchw(inputInfo, inputData);
+        PermuteTensorNhwcToNchw(kernelInfo, kernelData);
+        PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
+    }
+
+    Convolution2dDescriptor descriptor;
+    descriptor.m_StrideX     = 1;
+    descriptor.m_StrideY     = 1;
+    descriptor.m_PadLeft     = 0;
+    descriptor.m_PadRight    = 0;
+    descriptor.m_PadTop      = 0;
+    descriptor.m_PadBottom   = 0;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_DataLayout  = layout;
+
+    std::unique_ptr<ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputInfo);
+    std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
+
+    WorkloadInfo workloadInfo;
+    ScopedCpuTensorHandle weightTensor(kernelInfo);
+    ScopedCpuTensorHandle biasTensor(biasInfo);
+
+    AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
+    AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
+
+    Convolution2dQueueDescriptor queueDescriptor;
+    queueDescriptor.m_Parameters = descriptor;
+    queueDescriptor.m_Weight     = &weightTensor;
+    queueDescriptor.m_Bias       = &biasTensor;
+
+    AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
+    AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+
+    std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), inputData.data());
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    LayerTestResult<uint8_t, 4> ret(outputInfo);
+    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
+    ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
+
+    return ret;
+}
+
 LayerTestResult<float,4> CompareConvolution2dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
index f5ff586..3aac975 100644 (file)
@@ -111,6 +111,11 @@ LayerTestResult<float, 4> CompareConvolution2dTest(
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     armnn::IWorkloadFactory& refWorkloadFactory);
 
+LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::DataLayout layout);
+
 //
 // DepthwiseConvolution2d
 //
index 716e8d9..4252fec 100644 (file)
@@ -433,11 +433,12 @@ bool RefLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
     bool supported = true;
 
     // Define supported types.
-    std::array<DataType,4> supportedTypes = {
-            DataType::Float32,
-            DataType::Float16,
-            DataType::QuantisedAsymm8,
-            DataType::QuantisedSymm16
+    std::array<DataType,4> supportedTypes =
+    {
+        DataType::Float32,
+        DataType::Float16,
+        DataType::QuantisedAsymm8,
+        DataType::QuantisedSymm16
     };
 
     supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
@@ -446,22 +447,39 @@ bool RefLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
     supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
                                   "Reference convolution2d: output is not a supported type.");
 
-    supported &= CheckSupportRule(TypeAnyOf(weights, supportedTypes), reasonIfUnsupported,
-                                  "Reference convolution2d: weights is not a supported type.");
-
     supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
                                   "Reference convolution2d: input and output types mismatched.");
 
-    supported &= CheckSupportRule(TypesAreEqual(input, weights), reasonIfUnsupported,
-                                  "Reference convolution2d: input and weights types mismatched.");
+    const DataType inputType = input.GetDataType();
+    if (inputType == DataType::QuantisedAsymm8)
+    {
+        std::array<DataType, 2> supportedWeightTypes =
+        {
+            DataType::QuantisedAsymm8,
+            DataType::QuantizedSymm8PerAxis
+        };
+
+        supported &= CheckSupportRule(TypeAnyOf(weights, supportedWeightTypes), reasonIfUnsupported,
+                                      "Reference convolution2d: weights type not supported for quantized input.");
+    }
+    else
+    {
+        supported &= CheckSupportRule(TypeAnyOf(weights, supportedTypes), reasonIfUnsupported,
+                                      "Reference convolution2d: weights is not a supported type.");
+
+        supported &= CheckSupportRule(TypesAreEqual(input, weights), reasonIfUnsupported,
+                                      "Reference convolution2d: input and weights types mismatched.");
+    }
 
     if (biases.has_value())
     {
-        std::array<DataType,3> biasesSupportedTypes = {
-                DataType::Float32,
-                DataType::Float16,
-                DataType::Signed32
+        std::array<DataType,3> biasesSupportedTypes =
+        {
+            DataType::Float32,
+            DataType::Float16,
+            DataType::Signed32
         };
+
         supported &= CheckSupportRule(TypeAnyOf(biases.value(), biasesSupportedTypes), reasonIfUnsupported,
                                       "Reference convolution2d: biases is not a supported type.");
     }
index 2c38ed5..c407828 100644 (file)
@@ -145,6 +145,8 @@ ARMNN_AUTO_TEST_CASE(Convolution2d2x2Dilation2x2Padding2x2Stride3x3NhwcInt16,
                      false,
                      DataLayout::NHWC)
 
+ARMNN_AUTO_TEST_CASE(Convolution2dPerAxisQuantTestNchw, Convolution2dPerAxisQuantTest, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(Convolution2dPerAxisQuantTestNhwc, Convolution2dPerAxisQuantTest, DataLayout::NHWC);
 
 // Depthwise Convolution
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true, DataLayout::NCHW)
index 5047531..95a31fb 100644 (file)
@@ -11,6 +11,7 @@
 #include <ResolveType.hpp>
 
 #include <boost/assert.hpp>
+#include <boost/core/ignore_unused.hpp>
 
 namespace armnn
 {
@@ -22,6 +23,8 @@ public:
 
     virtual ~BaseIterator() {}
 
+    virtual BaseIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) = 0;
+
     virtual BaseIterator& operator++() = 0;
 
     virtual BaseIterator& operator+=(const unsigned int increment) = 0;
@@ -101,6 +104,14 @@ public:
         return *this;
     }
 
+    TypedIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) override
+    {
+        boost::ignore_unused(axisIndex);
+        BOOST_ASSERT(m_Iterator);
+        m_Iterator = m_Start + index;
+        return *this;
+    }
+
 protected:
     T* m_Iterator;
     T* m_Start;
@@ -350,7 +361,7 @@ public:
     {}
 
     // This should be called to set index for per-axis Encoder/Decoder
-    PerAxisIterator& SetIndex(unsigned int index, unsigned int axisIndex)
+    PerAxisIterator& SetIndex(unsigned int index, unsigned int axisIndex) override
     {
          BOOST_ASSERT(m_Iterator);
          m_Iterator = m_Start + index;
index 92e3b2d..0c13e3b 100644 (file)
@@ -165,7 +165,7 @@ void Convolve(const TensorShape& rInputShape,
                                     }
                                 }
 
-                                rFilterDecoder[filterIndex];
+                                rFilterDecoder.SetIndex(filterIndex, cOutput);
                                 float filterValue = rFilterDecoder.Get();
 
                                 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
@@ -211,7 +211,7 @@ void Convolve(const TensorShape& rInputShape,
 
                     if (biasEnabled)
                     {
-                        (*pBiasDecoder)[cOutput];
+                        (*pBiasDecoder).SetIndex(cOutput, cOutput);
                         sum += pBiasDecoder->Get();
                     }
 
@@ -225,4 +225,4 @@ void Convolve(const TensorShape& rInputShape,
     }
 }
 
-} //namespace armnn
+} // namespace armnn