runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc

   1 /*
   2  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "FullyConnectedLayer.h"
  18
  19 #include "../Tensor.h"
  20 #include <cker/operation/FullyConnected.h>
  21 #include <cker/TensorUtils.h>
  22 #include <misc/polymorphic_downcast.h>
  23
  24 namespace onert
  25 {
  26 namespace backend
  27 {
  28 namespace cpu
  29 {
  30 namespace ops
  31 {
  32
  33 FullyConnectedLayer::FullyConnectedLayer()
  34     : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
  35       _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
  36       _external_context(nullptr), _is_hybrid(false)
  37 {
  38   // DO NOTHING
  39 }
  40
  41 FullyConnectedLayer::~FullyConnectedLayer() = default;
  42
  43 void FullyConnectedLayer::fullyConnectedFloat32()
  44 {
  45   float output_activation_min = 0, output_activation_max = 0;
  46   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
  47
  48   nnfw::cker::FullyConnectedParams op_params;
  49   op_params.float_activation_min = output_activation_min;
  50   op_params.float_activation_max = output_activation_max;
  51   op_params.activation = convertActivationType(_activation);
  52
  53   nnfw::cker::FullyConnected(
  54       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
  55       getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
  56       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
  57       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
  58 }
  59
  60 // executionMutex is used to protect concurrent access of non-threadsafe resources
  61 // like gemmlowp::GemmContext.
  62 void FullyConnectedLayer::fullyConnectedQuant8()
  63 {
  64   double real_multiplier = 0.0;
  65   int32_t output_multiplier = 0;
  66   int32_t output_shift = 0;
  67   int32_t output_activation_min = 0;
  68   int32_t output_activation_max = 0;
  69   GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
  70   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
  71   CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
  72                                 &output_activation_max);
  73
  74   nnfw::cker::FullyConnectedParams op_params;
  75   op_params.input_offset = -_input->data_offset();
  76   op_params.weights_offset = -_weights->data_offset();
  77   op_params.output_offset = _output->data_offset();
  78   op_params.output_multiplier = output_multiplier;
  79   op_params.output_shift = output_shift;
  80   op_params.quantized_activation_min = output_activation_min;
  81   op_params.quantized_activation_max = output_activation_max;
  82
  83   nnfw::cker::FullyConnected(
  84       op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
  85       getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
  86       getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
  87       getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
  88 }
  89
  90 void FullyConnectedLayer::fullyConnectedHybrid()
  91 {
  92   nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
  93   if (!temp_arena.prepared)
  94   {
  95     temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
  96   }
  97
  98   nnfw::cker::FullyConnectedParams op_params;
  99   op_params.activation = convertActivationType(_activation);
 100   op_params.weights_scale = _weights->data_scale();
 101
 102 #ifndef USE_RUY_GEMV
 103   nnfw::cker::FullyConnectedHybrid(
 104       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 105       getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
 106       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 107       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
 108       _external_context->ruy_context());
 109 #else
 110   nnfw::cker::FullyConnectedHybrid(
 111       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 112       getTensorShape(_weights),
 113       (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
 114                         : reinterpret_cast<const int8_t *>(_weights->buffer()),
 115       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 116       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
 117       _external_context->ruy_context());
 118
 119   if (_cached_weights == nullptr || _is_weights_freed)
 120     return;
 121
 122   // '_cached_weights is not nullptr and _is_weights_freed is false' means
 123   // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
 124   // After entering here, it will not enter again except below the case - input is zero-vector
 125
 126   // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
 127   // so that handle this case
 128   const int input_size = getTensorShape(_input).FlatSize();
 129   if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
 130     return;
 131
 132   auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
 133
 134   // This weight tensor could be other ops' const tensor.
 135   // Therefore, below reference should be checked like following
 136   auto tensor = const_cast<Tensor *>(weight_tensor);
 137   if (tensor->buffer() == nullptr) // ref is already 0?
 138   {
 139     _is_weights_freed = true;
 140     return;
 141   }
 142
 143   tensor->decrease_ref();
 144   if (tensor->buffer() == nullptr) // ref == 0?
 145   {
 146     _is_weights_freed = true;
 147   }
 148 #endif
 149 }
 150
 151 void FullyConnectedLayer::fullyConnectedSparseWeight()
 152 {
 153   float output_activation_min = 0, output_activation_max = 0;
 154   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
 155
 156   nnfw::cker::FullyConnectedParams op_params;
 157   op_params.float_activation_min = output_activation_min;
 158   op_params.float_activation_max = output_activation_max;
 159   op_params.activation = convertActivationType(_activation);
 160
 161   const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
 162   const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
 163
 164   auto block_size = _weights->sparsity()->block_size();
 165   if (block_size.size() == 0)
 166   {
 167     nnfw::cker::FullyConnectedSparseWeightRandom(
 168         op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 169         getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
 170         getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 171         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
 172         w1_indices);
 173   }
 174   else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
 175   {
 176     nnfw::cker::FullyConnectedSparseWeight16x1(
 177         op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 178         getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
 179         getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 180         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
 181         w1_indices);
 182   }
 183   else
 184     throw std::runtime_error{"FullyConnected: unsupported sparsity"};
 185 }
 186
 187 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
 188                                     const IPortableTensor *bias, ir::Activation activation,
 189                                     IPortableTensor *output,
 190                                     const std::shared_ptr<ExternalContext> &external_context)
 191 {
 192   _input = input;
 193   _weights = weights;
 194   _bias = bias;
 195   _activation = activation;
 196   _output = output;
 197   _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
 198                weights->data_type() == OperandType::QUANT_INT8_SYMM;
 199   _external_context = external_context;
 200 }
 201
 202 void FullyConnectedLayer::run()
 203 {
 204   if (_is_hybrid)
 205   {
 206     fullyConnectedHybrid();
 207   }
 208   else if (_weights->sparsity())
 209   {
 210     fullyConnectedSparseWeight();
 211   }
 212   else if (_input->data_type() == OperandType::FLOAT32)
 213   {
 214     fullyConnectedFloat32();
 215   }
 216   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
 217   {
 218     fullyConnectedQuant8();
 219   }
 220   else
 221   {
 222     throw std::runtime_error{"FullyConnected: unsupported data type"};
 223   }
 224 }
 225
 226 void FullyConnectedLayer::prepare()
 227 {
 228   if (_bias && _bias->is_constant())
 229   {
 230     const int bias_size = getTensorShape(_bias).FlatSize();
 231     if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
 232     {
 233       _bias = nullptr;
 234     }
 235   }
 236
 237 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
 238   // TODO This is workaround
 239   // The only fc hybrid will use ruy kernel
 240   if (_input->data_type() != OperandType::FLOAT32 ||
 241       _weights->data_type() != OperandType::QUANT_INT8_SYMM)
 242   {
 243     return;
 244   }
 245
 246   // NOTE. The condition to enable caching on ruy kernel can be changed according to ruy's version
 247
 248   // If input is dynamic, it changes total size of input
 249   // If weights is not constant, weights cannot be cached
 250   if (_input->is_dynamic() || !_weights->is_constant())
 251     return;
 252
 253   const int rows = getTensorShape(_weights).Dims(0);
 254   if (rows % 4 == 0)
 255   {
 256     // TODO If it's possible to extract precaching from ruy kernel,
 257     // place this instead of below code
 258
 259     // buffer will be used by ruy kernel as a cache key
 260     _cached_weights = _weights->buffer();
 261   }
 262 #endif
 263 }
 264
 265 } // namespace ops
 266 } // namespace cpu
 267 } // namespace backend
 268 } // namespace onert