runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "DepthwiseConvolutionLayer.h"
  18
  19 #include "cker/PortableTensorUtils.h"
  20 #include <cker/operation/DepthwiseConv.h>
  21
  22 namespace onert
  23 {
  24 namespace backend
  25 {
  26 namespace cpu
  27 {
  28 namespace ops
  29 {
  30
  31 void DepthwiseConvolutionLayer::convFloat32()
  32 {
  33   float output_activation_min = 0, output_activation_max = 0;
  34   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
  35
  36   nnfw::cker::DepthwiseConvParams op_params;
  37   op_params.stride_width = _strideWidth;
  38   op_params.stride_height = _strideHeight;
  39   op_params.dilation_width_factor = _dilationWidth;
  40   op_params.dilation_height_factor = _dilationHeight;
  41   op_params.padding_values.width = _paddingLeft;
  42   op_params.padding_values.height = _paddingTop;
  43   op_params.depth_multiplier = _multiplier;
  44   op_params.float_activation_min = output_activation_min;
  45   op_params.float_activation_max = output_activation_max;
  46
  47   nnfw::cker::DepthwiseConv<float, float>(
  48     op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
  49     getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
  50     getBuffer<float>(_output), _external_context->ruy_context());
  51 }
  52
  53 void DepthwiseConvolutionLayer::convQ8uPerTensor()
  54 {
  55   int32_t output_activation_min = 0;
  56   int32_t output_activation_max = 0;
  57   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
  58                                     &output_activation_max);
  59
  60   double real_multiplier = 0.0;
  61   int32_t output_multiplier = 0;
  62   int32_t output_shift = 0;
  63   GetQuantizedConvolutionMultiplier(_input, _kernel, _bias, _output, &real_multiplier);
  64   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
  65
  66   nnfw::cker::DepthwiseConvParams op_params;
  67   op_params.stride_width = _strideWidth;
  68   op_params.stride_height = _strideHeight;
  69   op_params.dilation_width_factor = _dilationWidth;
  70   op_params.dilation_height_factor = _dilationHeight;
  71   op_params.padding_values.width = _paddingLeft;
  72   op_params.padding_values.height = _paddingTop;
  73   op_params.depth_multiplier = _multiplier;
  74   op_params.input_offset = -_input->data_zero_point();
  75   op_params.weights_offset = -_kernel->data_zero_point();
  76   op_params.output_offset = _output->data_zero_point();
  77   op_params.output_multiplier = output_multiplier;
  78   op_params.output_shift = output_shift;
  79   op_params.quantized_activation_min = output_activation_min;
  80   op_params.quantized_activation_max = output_activation_max;
  81
  82   nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
  83     op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
  84     getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
  85     getBuffer<uint8_t>(_output), _external_context->ruy_context());
  86 }
  87
  88 void DepthwiseConvolutionLayer::convQ8uPerChannel()
  89 {
  90   nnfw::cker::DepthwiseConvParams op_params;
  91   op_params.padding_values.width = _paddingLeft;
  92   op_params.padding_values.height = _paddingTop;
  93   op_params.stride_width = _strideWidth;
  94   op_params.stride_height = _strideHeight;
  95   op_params.dilation_width_factor = _dilationWidth;
  96   op_params.dilation_height_factor = _dilationHeight;
  97   op_params.depth_multiplier = _multiplier;
  98   op_params.input_offset = -_input->data_zero_point();
  99   op_params.output_offset = _output->data_zero_point();
 100   int32_t output_activation_min = 0;
 101   int32_t output_activation_max = 0;
 102   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
 103                                     &output_activation_max);
 104   op_params.quantized_activation_min = output_activation_min;
 105   op_params.quantized_activation_max = output_activation_max;
 106   // NOTE: The following fields of ConvParams are not used:
 107   // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
 108
 109   nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
 110     op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
 111     getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
 112     _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
 113     getShape(_output), getBuffer<uint8_t>(_output));
 114 }
 115
 116 void DepthwiseConvolutionLayer::convQ8i()
 117 {
 118   if (!_prepared)
 119   {
 120     prepareQ8i();
 121     _prepared = true;
 122   }
 123
 124   int32_t output_activation_min = 0;
 125   int32_t output_activation_max = 0;
 126   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
 127                                     &output_activation_max);
 128
 129   nnfw::cker::DepthwiseConvParams op_params;
 130   op_params.padding_type = nnfw::cker::PaddingType::kSame;
 131   op_params.padding_values.width = _paddingLeft;
 132   op_params.padding_values.height = _paddingTop;
 133   op_params.depth_multiplier = _multiplier;
 134   op_params.stride_width = _strideWidth;
 135   op_params.stride_height = _strideHeight;
 136   op_params.dilation_width_factor = _dilationWidth;
 137   op_params.dilation_height_factor = _dilationHeight;
 138   op_params.input_offset = -_input->data_zero_point();
 139   op_params.weights_offset = 0;
 140   op_params.output_offset = _output->data_zero_point();
 141   op_params.quantized_activation_min = output_activation_min;
 142   op_params.quantized_activation_max = output_activation_max;
 143
 144   nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(
 145     op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
 146     getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
 147     getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
 148     _external_context->ruy_context());
 149 }
 150
 151 void DepthwiseConvolutionLayer::convQ8iHybridPerChannel()
 152 {
 153   if (!_prepared)
 154   {
 155     prepareQ8iHybridPerChannel();
 156     _prepared = true;
 157   }
 158
 159   float output_activation_min = 0, output_activation_max = 0;
 160   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
 161
 162   auto input_shape = getShape(_input);
 163   const int batch_size = input_shape.Dims(0);
 164   const int input_size = input_shape.FlatSize() / batch_size;
 165
 166   auto scaling_factors_ptr = _input_scaling_factors.data();
 167   auto input_offsets_ptr = _input_offsets.data();
 168
 169   for (int b = 0; b < batch_size; ++b)
 170   {
 171     const int offset = b * input_size;
 172     nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,
 173                                                  _input_quantized.data() + offset,
 174                                                  &scaling_factors_ptr[b], &input_offsets_ptr[b]);
 175   }
 176
 177   nnfw::cker::DepthwiseConvParams op_params;
 178   op_params.padding_values.width = _paddingLeft;
 179   op_params.padding_values.height = _paddingTop;
 180   op_params.depth_multiplier = _multiplier;
 181   op_params.stride_width = _strideWidth;
 182   op_params.stride_height = _strideHeight;
 183   op_params.dilation_width_factor = _dilationWidth;
 184   op_params.dilation_height_factor = _dilationHeight;
 185   op_params.float_activation_min = output_activation_min;
 186   op_params.float_activation_max = output_activation_max;
 187
 188   nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel(
 189     op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),
 190     getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),
 191     getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),
 192     _input_offsets.data());
 193 }
 194
 195 void DepthwiseConvolutionLayer::prepareQ8i()
 196 {
 197   GetQuantizedConvolutionMultipliersAndShifts(
 198     _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
 199     _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
 200     _per_channel_output_shift);
 201 }
 202
 203 void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
 204 {
 205   GetQuantizedConvolutionMultipliersAndShifts(
 206     _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
 207     _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
 208     _per_channel_output_shift);
 209 }
 210
 211 void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
 212 {
 213   // allocate memory for activation quantization.
 214   // - quantized values (int8_t type and same shape of original input)
 215   // - quantization params (= scale/zeropoint for each input)
 216   auto input_shape = getShape(_input);
 217   const int batch_size = input_shape.Dims(0);
 218   const int input_size = input_shape.FlatSize() / batch_size;
 219   _input_quantized.resize(input_size);
 220   // TODO: Optimize the case of batch_size = 1
 221   _input_scaling_factors.resize(batch_size);
 222   _input_offsets.resize(batch_size);
 223 }
 224
 225 void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
 226 {
 227   // ensure weight is per-channel quantized.
 228   int32_t kernel_input_channel = getShape(_kernel).Dims(3);
 229   // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
 230   size_t kernel_zerop_cnt = _kernel->data_scales().size();
 231   // promote to int64_t to compare int32_t and uint32_t
 232   if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
 233     throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};
 234 }
 235
 236 void DepthwiseConvolutionLayer::configure(
 237   const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
 238   const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
 239   const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
 240   const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
 241   const ir::Activation activation, IPortableTensor *output,
 242   const std::shared_ptr<ExternalContext> &external_context)
 243 {
 244   _input = input;
 245   _kernel = kernel;
 246   _bias = bias;
 247   _paddingLeft = paddingLeft;
 248   _paddingRight = paddingRight;
 249   _paddingTop = paddingTop;
 250   _paddingBottom = paddingBottom;
 251   _strideWidth = strideWidth;
 252   _strideHeight = strideHeight;
 253   _multiplier = multiplier;
 254   _dilationWidth = dilationWidth;
 255   _dilationHeight = dilationHeight;
 256   _activation = activation;
 257   _output = output;
 258   _external_context = external_context;
 259   _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
 260                _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
 261
 262   if (_is_hybrid)
 263   {
 264     ensureQ8iHybridPerChannel();
 265     prepareQ8iHybridPerChannel();
 266     _prepared = true;
 267   }
 268   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
 269   {
 270     if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
 271     {
 272       prepareQ8i();
 273       _prepared = true;
 274     }
 275   }
 276   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
 277            !_input->is_dynamic() && !_output->is_dynamic())
 278   {
 279     const bool per_channel_quantized = _kernel->data_scales().size() > 1;
 280     if (per_channel_quantized)
 281     {
 282       prepareQ8uPerChannel();
 283       _prepared = true;
 284     }
 285   }
 286 }
 287
 288 void DepthwiseConvolutionLayer::run()
 289 {
 290   if (_is_hybrid)
 291   {
 292     convQ8iHybridPerChannel();
 293   }
 294   else if (_input->data_type() == OperandType::FLOAT32)
 295   {
 296     convFloat32();
 297   }
 298   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
 299   {
 300     const bool per_channel_quantized = _kernel->data_scales().size() > 1;
 301     if (per_channel_quantized)
 302       convQ8uPerChannel();
 303     else
 304       convQ8uPerTensor();
 305   }
 306   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
 307   {
 308     convQ8i();
 309   }
 310   else
 311   {
 312     throw std::runtime_error{"DepthwiseConv: unsupported data type"};
 313   }
 314 }
 315
 316 } // namespace ops
 317 } // namespace cpu
 318 } // namespace backend
 319 } // namespace onert