runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc

   1 /*
   2  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "FullyConnectedLayer.h"
  18
  19 #include "../Tensor.h"
  20 #include <cker/operation/FullyConnected.h>
  21 #include <cker/TensorUtils.h>
  22 #include <misc/polymorphic_downcast.h>
  23
  24 namespace onert
  25 {
  26 namespace backend
  27 {
  28 namespace cpu
  29 {
  30 namespace ops
  31 {
  32
  33 FullyConnectedLayer::FullyConnectedLayer()
  34     : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
  35       _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
  36       _external_context(nullptr), _is_hybrid(false)
  37 {
  38   // DO NOTHING
  39 }
  40
  41 FullyConnectedLayer::~FullyConnectedLayer() = default;
  42
  43 void FullyConnectedLayer::fullyConnectedFloat32()
  44 {
  45   float output_activation_min = 0, output_activation_max = 0;
  46   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
  47
  48   nnfw::cker::FullyConnectedParams op_params;
  49   op_params.float_activation_min = output_activation_min;
  50   op_params.float_activation_max = output_activation_max;
  51   op_params.activation = convertActivationType(_activation);
  52
  53   nnfw::cker::FullyConnected(
  54       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
  55       getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
  56       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
  57       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
  58 }
  59
  60 // executionMutex is used to protect concurrent access of non-threadsafe resources
  61 // like gemmlowp::GemmContext.
  62 void FullyConnectedLayer::fullyConnectedQuant8()
  63 {
  64   double real_multiplier = 0.0;
  65   int32_t output_multiplier = 0;
  66   int32_t output_shift = 0;
  67   int32_t output_activation_min = 0;
  68   int32_t output_activation_max = 0;
  69   GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
  70   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
  71   CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
  72                                 &output_activation_max);
  73
  74   nnfw::cker::FullyConnectedParams op_params;
  75   op_params.input_offset = -_input->data_offset();
  76   op_params.weights_offset = -_weights->data_offset();
  77   op_params.output_offset = _output->data_offset();
  78   op_params.output_multiplier = output_multiplier;
  79   op_params.output_shift = output_shift;
  80   op_params.quantized_activation_min = output_activation_min;
  81   op_params.quantized_activation_max = output_activation_max;
  82
  83   nnfw::cker::FullyConnected(
  84       op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
  85       getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
  86       getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
  87       getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
  88 }
  89
  90 void FullyConnectedLayer::fullyConnectedHybrid()
  91 {
  92   nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
  93   if (!temp_arena.prepared)
  94   {
  95     temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
  96   }
  97
  98   nnfw::cker::FullyConnectedParams op_params;
  99   op_params.activation = convertActivationType(_activation);
 100   op_params.weights_scale = _weights->data_scale();
 101
 102 #ifndef USE_RUY_GEMV
 103   nnfw::cker::FullyConnectedHybrid(
 104       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 105       getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
 106       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 107       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
 108       _external_context->ruy_context());
 109 #else
 110   nnfw::cker::FullyConnectedHybrid(
 111       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 112       getTensorShape(_weights),
 113       (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
 114                         : reinterpret_cast<const int8_t *>(_weights->buffer()),
 115       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 116       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
 117       _external_context->ruy_context());
 118
 119   if (_cached_weights == nullptr || _is_weights_freed)
 120     return;
 121
 122   // '_cached_weights is not nullptr and _is_weights_freed is false' means
 123   // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
 124   // After entering here, it will not enter again except below the case - input is zero-vector
 125
 126   // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
 127   // so that handle this case
 128   const int input_size = getTensorShape(_input).FlatSize();
 129   if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
 130     return;
 131
 132   auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
 133
 134   // This weight tensor could be other ops' const tensor.
 135   // Therefore, below reference should be checked like following
 136   auto tensor = const_cast<Tensor *>(weight_tensor);
 137   if (tensor->buffer() == nullptr) // ref is already 0?
 138   {
 139     _is_weights_freed = true;
 140     return;
 141   }
 142
 143   tensor->decrease_ref();
 144   if (tensor->buffer() == nullptr) // ref == 0?
 145   {
 146     _is_weights_freed = true;
 147   }
 148 #endif
 149 }
 150
 151 void FullyConnectedLayer::fullyConnectedSparseWeight()
 152 {
 153   float output_activation_min = 0, output_activation_max = 0;
 154   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
 155
 156   nnfw::cker::FullyConnectedParams op_params;
 157   op_params.float_activation_min = output_activation_min;
 158   op_params.float_activation_max = output_activation_max;
 159   op_params.activation = convertActivationType(_activation);
 160
 161   int w0_size = getTensorShape(_weights).Dims(0);
 162   const uint16_t *w1_segments = _weights->w1_segments();
 163   const uint16_t *w1_indices = _weights->w1_indices();
 164
 165   nnfw::cker::FullyConnectedSparseWeight(
 166       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
 167       getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
 168       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
 169       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
 170       w1_indices);
 171 }
 172
 173 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
 174                                     const IPortableTensor *bias, ir::Activation activation,
 175                                     IPortableTensor *output,
 176                                     const std::shared_ptr<ExternalContext> &external_context)
 177 {
 178   _input = input;
 179   _weights = weights;
 180   _bias = bias;
 181   _activation = activation;
 182   _output = output;
 183   _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
 184                weights->data_type() == OperandType::QUANT_INT8_SYMM;
 185   _external_context = external_context;
 186 }
 187
 188 void FullyConnectedLayer::run()
 189 {
 190   if (_is_hybrid)
 191   {
 192     fullyConnectedHybrid();
 193   }
 194   else if (_weights->is_sparse())
 195   {
 196     fullyConnectedSparseWeight();
 197   }
 198   else if (_input->data_type() == OperandType::FLOAT32)
 199   {
 200     fullyConnectedFloat32();
 201   }
 202   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
 203   {
 204     fullyConnectedQuant8();
 205   }
 206   else
 207   {
 208     throw std::runtime_error{"FullyConnected: unsupported data type"};
 209   }
 210 }
 211
 212 void FullyConnectedLayer::prepare()
 213 {
 214   if (_bias && _bias->is_constant())
 215   {
 216     const int bias_size = getTensorShape(_bias).FlatSize();
 217     if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
 218     {
 219       _bias = nullptr;
 220     }
 221   }
 222
 223 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
 224   // TODO This is workaround
 225   // The only fc hybrid will use ruy kernel
 226   if (_input->data_type() != OperandType::FLOAT32 ||
 227       _weights->data_type() != OperandType::QUANT_INT8_SYMM)
 228   {
 229     return;
 230   }
 231
 232   // NOTE. The condition to enable caching on ruy kernel can be changed according to ruy's version
 233
 234   // If input is dynamic, it changes total size of input
 235   // If weights is not constant, weights cannot be cached
 236   if (_input->is_dynamic() || !_weights->is_constant())
 237     return;
 238
 239   const int rows = getTensorShape(_weights).Dims(0);
 240   if (rows % 4 == 0)
 241   {
 242     const int total_input_size = getTensorShape(_input).FlatSize();
 243     const int input_size = getTensorShape(_weights).Dims(1);
 244     const int batch_size = total_input_size / input_size;
 245     if (batch_size <= 4)
 246     {
 247       // TODO If it's possible to extract precaching from ruy kernel,
 248       // place this instead of below code
 249
 250       // buffer will be used by ruy kernel as a cache key
 251       _cached_weights = _weights->buffer();
 252     }
 253   }
 254 #endif
 255 }
 256
 257 } // namespace ops
 258 } // namespace cpu
 259 } // namespace backend
 260 } // namespace onert