numWeightInputChannels % numWeightChannelMultiplier));
}
- ValidateTensorDataTypesMatch(inputTensorInfo, weightTensorInfo, descriptorName, "input", "weight");
+ ValidateWeightDataType(inputTensorInfo, weightTensorInfo, descriptorName);
+ Optional<TensorInfo> optionalBiasTensorInfo;
if (m_Parameters.m_BiasEnabled)
{
ValidatePointer(m_Bias, descriptorName, "bias");
- const TensorInfo& biasTensorInfo = m_Bias->GetTensorInfo();
- ValidateTensorNumDimensions(biasTensorInfo, descriptorName, 1, "bias");
+ optionalBiasTensorInfo = MakeOptional<TensorInfo>(m_Bias->GetTensorInfo());
+ const TensorInfo& biasTensorInfo = optionalBiasTensorInfo.value();
ValidateBiasTensorQuantization(biasTensorInfo, inputTensorInfo, weightTensorInfo, descriptorName);
ValidateTensorDataType(biasTensorInfo, GetBiasDataType(inputTensorInfo.GetDataType()), descriptorName, "bias");
}
+ ValidatePerAxisQuantization(inputTensorInfo,
+ outputTensorInfo,
+ weightTensorInfo,
+ optionalBiasTensorInfo,
+ descriptorName);
std::vector<DataType> supportedTypes =
{
workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
}
+LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::DataLayout layout)
+{
+ using namespace armnn;
+
+ const DataType inputType = DataType::QuantisedAsymm8;
+ const DataType kernelType = DataType::QuantizedSymm8PerAxis;
+ const DataType biasType = DataType::Signed32;
+
+ TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
+ TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
+
+ const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
+ const unsigned int quantDimension = 0;
+ TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
+
+ const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
+ constexpr unsigned int biasQuantDimension = 0;
+ TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
+
+ std::vector<uint8_t> inputData =
+ {
+ 129, 130,
+ 129, 130,
+ 129, 130,
+ 129, 130,
+ 129, 130,
+ 129, 130,
+ 129, 130,
+ 129, 130,
+ 129, 130
+ };
+
+ std::vector<int8_t> kernelData =
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1
+ };
+
+ std::vector<int32_t> biasData =
+ {
+ 4, 4, 4, 4
+ };
+
+ std::vector<uint8_t> expectedOutputData =
+ {
+ 132, 130, 134, 131,
+ 132, 130, 134, 131,
+ 132, 130, 134, 131,
+ 132, 130, 134, 131
+ };
+
+ if (layout == DataLayout::NCHW)
+ {
+ PermuteTensorNhwcToNchw(inputInfo, inputData);
+ PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
+ }
+
+ DepthwiseConvolution2dDescriptor descriptor;
+ descriptor.m_StrideX = 1;
+ descriptor.m_StrideY = 1;
+ descriptor.m_PadLeft = 0;
+ descriptor.m_PadRight = 0;
+ descriptor.m_PadTop = 0;
+ descriptor.m_PadBottom = 0;
+ descriptor.m_DilationX = 1;
+ descriptor.m_DilationY = 1;
+ descriptor.m_BiasEnabled = true;
+ descriptor.m_DataLayout = layout;
+
+ std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
+ std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
+
+ WorkloadInfo workloadInfo;
+ ScopedCpuTensorHandle weightTensor(kernelInfo);
+ ScopedCpuTensorHandle biasTensor(biasInfo);
+
+ AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
+ AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
+
+ DepthwiseConvolution2dQueueDescriptor queueDescriptor;
+ queueDescriptor.m_Parameters = descriptor;
+ queueDescriptor.m_Weight = &weightTensor;
+ queueDescriptor.m_Bias = &biasTensor;
+
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
+ AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+
+ std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), inputData.data());
+
+ ExecuteWorkload(*workload, memoryManager);
+
+ LayerTestResult<uint8_t, 4> ret(outputInfo);
+
+ CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
+ ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
+
+ return ret;
+}
+
LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
bool biasEnabled,
const armnn::DataLayout layout);
+LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::DataLayout layout);
+
LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
"Reference DepthwiseConvolution2d: output is not a supported type.");
- supported &= CheckSupportRule(TypeAnyOf(weights, supportedTypes), reasonIfUnsupported,
- "Reference DepthwiseConvolution2d: weights is not a supported type.");
-
supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
"Reference DepthwiseConvolution2d: input and output types mismatched.");
- supported &= CheckSupportRule(TypesAreEqual(input, weights), reasonIfUnsupported,
- "Reference DepthwiseConvolution2d: input and weights types mismatched.");
+ const DataType inputType = input.GetDataType();
+ if (inputType == DataType::QuantisedAsymm8)
+ {
+ std::array<DataType, 2> supportedWeightTypes =
+ {
+ DataType::QuantisedAsymm8,
+ DataType::QuantizedSymm8PerAxis
+ };
+
+ supported &= CheckSupportRule(TypeAnyOf(weights, supportedWeightTypes), reasonIfUnsupported,
+ "Reference convolution2d: weights type not supported for quantized input.");
+ }
+ else
+ {
+ supported &= CheckSupportRule(TypeAnyOf(weights, supportedTypes), reasonIfUnsupported,
+ "Reference DepthwiseConvolution2d: weights is not a supported type.");
+
+ supported &= CheckSupportRule(TypesAreEqual(input, weights), reasonIfUnsupported,
+ "Reference DepthwiseConvolution2d: input and weights types mismatched.");
+ }
if (biases.has_value())
{
DataLayout::NCHW)
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dQSymm16, DepthwiseConvolution2dInt16Test, true, DataLayout::NCHW)
-// NHWC Depthwise Convolution
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dNhwc, DepthwiseConvolution2dTest, true, DataLayout::NHWC)
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dUint8Nhwc, DepthwiseConvolution2dUint8Test, true, DataLayout::NHWC)
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul64, DepthwiseConvolution2dDepthMul64Test);
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dPerAxisQuantTestNchw, DepthwiseConvolution2dPerAxisQuantTest,
+ DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dPerAxisQuantTestNhwc, DepthwiseConvolution2dPerAxisQuantTest,
+ DataLayout::NHWC);
+
// Pooling
//MaxPooling
ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false)
int32_t m_RightShift;
};
-/// An implementation shared by normal and depthwise convolution.
-template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
-static void ConvImpl(ConvData data,
- const InputType* inputData,
- float inputScale,
- int32_t inputOffset,
- const InputType* filterData,
- float filterScale,
- int32_t filterOffset,
- const BiasType* biasData,
- float outputScale,
- int32_t outputOffset,
- const TensorInfo& filterInfo,
- bool depthwise = false)
-{
- if (data.m_Parameters.m_BiasEnabled && !biasData)
- {
- throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
- }
-
- const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
- const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
-
- TensorBufferArrayView<InputType> output(outputInfo.GetShape(),
- GetOutputTensorData<InputType>(0, data),
- data.m_Parameters.m_DataLayout);
-
- const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);
-
- const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
- const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
- const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
-
- unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;
- unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];
- unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];
-
- unsigned int batchSize = outputInfo.GetShape()[0];
- unsigned int outputHeight = outputInfo.GetShape()[heightIndex];
- unsigned int outputWidth = outputInfo.GetShape()[widthIndex];
- unsigned int inputHeight = inputInfo.GetShape()[heightIndex];
- unsigned int inputWidth = inputInfo.GetShape()[widthIndex];
-
- unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];
- unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];
-
- unsigned int paddingTop = data.m_Parameters.m_PadTop;
- unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
- unsigned int xStride = data.m_Parameters.m_StrideX;
- unsigned int yStride = data.m_Parameters.m_StrideY;
- unsigned int xDilation = data.m_Parameters.m_DilationX;
- unsigned int yDilation = data.m_Parameters.m_DilationY;
-
- // The world's least efficient convolution.
- for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
- {
- for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
- {
- for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
- {
- for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
- {
- // This loop goes over each output element.
- AccumulatorType sum = AccumulatorType();
-
- // For depthwise, each output channel corresponds to exactly one input channel.
- // For normal, must loop over each input channel.
- for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
- {
- unsigned int depthwiseMultiplierIdx = 0;
- if (depthwise)
- {
- cInput = cOutput / depthMultiplier;
- depthwiseMultiplierIdx = cOutput % depthMultiplier;
- }
-
- for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
- {
- for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
- {
- // This loop goes over each input element for each output element.
-
- unsigned int filterIndex = 0;
-
- // Since dimensionality of kernel depends on depthwiseness, so does index.
- if (depthwise)
- {
- filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
- cInput * filterWidth * filterHeight +
- yFilter * filterWidth +
- xFilter;
- }
- else
- {
- if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
- {
- filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
- yFilter * filterWidth * inputChannels +
- xFilter * inputChannels +
- cInput;
- }
- else
- {
- filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
- cInput * filterWidth * filterHeight +
- yFilter * filterWidth +
- xFilter;
- }
- }
-
- AccumulatorType filterValue = filterData[filterIndex] -
- boost::numeric_cast<AccumulatorType>(filterOffset);
-
- unsigned int yInput = yOutput * yStride + yFilter * yDilation;
- unsigned int xInput = xOutput * xStride + xFilter * xDilation;
-
- AccumulatorType inputValue;
-
- // Check if we're in the padding.
- if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
- xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
- {
- inputValue = AccumulatorType();
- }
- else
- {
- unsigned int inputIndex;
-
- if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
- {
- inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
- (yInput - paddingTop) * inputWidth * inputChannels +
- (xInput - paddingLeft) * inputChannels +
- cInput;
-
- }
- else
- {
- inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
- inputWidth * inputHeight * cInput +
- inputWidth * (yInput - paddingTop) +
- xInput - paddingLeft;
- }
-
- inputValue = inputData[inputIndex] -
- boost::numeric_cast<AccumulatorType>(inputOffset);
-
- }
- sum += filterValue * inputValue;
- }
- }
- }
-
- if (data.m_Parameters.m_BiasEnabled)
- {
- sum += biasData[cOutput];
- }
-
- if (outputScale != 0.0f)
- {
- float multiplier = (inputScale * filterScale) / outputScale;
- // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
- // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
- // sum = std::round(multiplier * sum + outputOffset);
- sum = boost::numeric_cast<AccumulatorType>(
- QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
- + boost::numeric_cast<AccumulatorType>(outputOffset);
- sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
- }
-
- output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);
- }
- }
- }
- }
-}
-
void Convolve(const TensorShape& rInputShape,
Decoder<float>& rInputDecoder,
const TensorShape& rOutputShape,