X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=runtime%2Fonert%2Fbackend%2Fcpu%2Fops%2FDepthwiseConvolutionLayer.cc;h=9e6de17f2de10dff608b320f50a9818dffbda3d8;hb=refs%2Ftags%2Fupstream%2F1.25.0;hp=8a48497d506fac1e066ea0300b0a26f7b0805873;hpb=3a0ad354832744d138b361ffcfd21f33494beb6b;p=platform%2Fcore%2Fml%2Fnnfw.git diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc index 8a48497..9e6de17 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc @@ -16,6 +16,7 @@ #include "DepthwiseConvolutionLayer.h" +#include "cker/PortableTensorUtils.h" #include namespace onert @@ -147,6 +148,50 @@ void DepthwiseConvolutionLayer::convQ8i() _external_context->ruy_context()); } +void DepthwiseConvolutionLayer::convQ8iHybridPerChannel() +{ + if (!_prepared) + { + prepareQ8iHybridPerChannel(); + _prepared = true; + } + + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + auto input_shape = getShape(_input); + const int batch_size = input_shape.Dims(0); + const int input_size = input_shape.FlatSize() / batch_size; + + auto scaling_factors_ptr = _input_scaling_factors.data(); + auto input_offsets_ptr = _input_offsets.data(); + + for (int b = 0; b < batch_size; ++b) + { + const int offset = b * input_size; + nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer(_input) + offset, input_size, + _input_quantized.data() + offset, + &scaling_factors_ptr[b], &input_offsets_ptr[b]); + } + + nnfw::cker::DepthwiseConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.depth_multiplier = _multiplier; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidth; + op_params.dilation_height_factor = _dilationHeight; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel( + op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(), + getShape(_kernel), getBuffer(_kernel), getShape(_bias), getBuffer(_bias), + getShape(_output), getBuffer(_output), _kernel->data_scales().data(), + _input_offsets.data()); +} + void DepthwiseConvolutionLayer::prepareQ8i() { GetQuantizedConvolutionMultipliersAndShifts( @@ -163,6 +208,31 @@ void DepthwiseConvolutionLayer::prepareQ8uPerChannel() _per_channel_output_shift); } +void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel() +{ + // allocate memory for activation quantization. + // - quantized values (int8_t type and same shape of original input) + // - quantization params (= scale/zeropoint for each input) + auto input_shape = getShape(_input); + const int batch_size = input_shape.Dims(0); + const int input_size = input_shape.FlatSize() / batch_size; + _input_quantized.resize(input_size); + // TODO: Optimize the case of batch_size = 1 + _input_scaling_factors.resize(batch_size); + _input_offsets.resize(batch_size); +} + +void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel() +{ + // ensure weight is per-channel quantized. + int32_t kernel_input_channel = getShape(_kernel).Dims(3); + // zero_points comes from flatbuffer vector. Its size is within uint32_t range. + size_t kernel_zerop_cnt = _kernel->data_scales().size(); + // promote to int64_t to compare int32_t and uint32_t + if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt) + throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."}; +} + void DepthwiseConvolutionLayer::configure( const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, @@ -186,8 +256,16 @@ void DepthwiseConvolutionLayer::configure( _activation = activation; _output = output; _external_context = external_context; + _is_hybrid = _input->data_type() == OperandType::FLOAT32 && + _kernel->data_type() == OperandType::QUANT_INT8_SYMM; - if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) + if (_is_hybrid) + { + ensureQ8iHybridPerChannel(); + prepareQ8iHybridPerChannel(); + _prepared = true; + } + else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic()) { @@ -209,7 +287,11 @@ void DepthwiseConvolutionLayer::configure( void DepthwiseConvolutionLayer::run() { - if (_input->data_type() == OperandType::FLOAT32) + if (_is_hybrid) + { + convQ8iHybridPerChannel(); + } + else if (_input->data_type() == OperandType::FLOAT32) { convFloat32(); }