Imported Upstream version 1.10.0
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / ops / FullyConnectedLayer.cc
1 /*
2  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "FullyConnectedLayer.h"
18
19 #include "../Tensor.h"
20 #include <cker/operation/FullyConnected.h>
21 #include <cker/TensorUtils.h>
22 #include <misc/polymorphic_downcast.h>
23
24 namespace onert
25 {
26 namespace backend
27 {
28 namespace cpu
29 {
30 namespace ops
31 {
32
33 FullyConnectedLayer::FullyConnectedLayer()
34     : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
35       _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
36       _external_context(nullptr), _is_hybrid(false)
37 {
38   // DO NOTHING
39 }
40
41 FullyConnectedLayer::~FullyConnectedLayer() = default;
42
43 void FullyConnectedLayer::fullyConnectedFloat32()
44 {
45   float output_activation_min = 0, output_activation_max = 0;
46   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
47
48   nnfw::cker::FullyConnectedParams op_params;
49   op_params.float_activation_min = output_activation_min;
50   op_params.float_activation_max = output_activation_max;
51   op_params.activation = convertActivationType(_activation);
52
53   nnfw::cker::FullyConnected(
54       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
55       getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
56       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
57       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
58 }
59
60 // executionMutex is used to protect concurrent access of non-threadsafe resources
61 // like gemmlowp::GemmContext.
62 void FullyConnectedLayer::fullyConnectedQuant8()
63 {
64   double real_multiplier = 0.0;
65   int32_t output_multiplier = 0;
66   int32_t output_shift = 0;
67   int32_t output_activation_min = 0;
68   int32_t output_activation_max = 0;
69   GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
70   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
71   CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
72                                 &output_activation_max);
73
74   nnfw::cker::FullyConnectedParams op_params;
75   op_params.input_offset = -_input->data_offset();
76   op_params.weights_offset = -_weights->data_offset();
77   op_params.output_offset = _output->data_offset();
78   op_params.output_multiplier = output_multiplier;
79   op_params.output_shift = output_shift;
80   op_params.quantized_activation_min = output_activation_min;
81   op_params.quantized_activation_max = output_activation_max;
82
83   nnfw::cker::FullyConnected(
84       op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
85       getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
86       getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
87       getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
88 }
89
90 void FullyConnectedLayer::fullyConnectedHybrid()
91 {
92   nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
93   if (!temp_arena.prepared)
94   {
95     temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
96   }
97
98   nnfw::cker::FullyConnectedParams op_params;
99   op_params.activation = convertActivationType(_activation);
100   op_params.weights_scale = _weights->data_scale();
101
102 #ifndef USE_RUY_GEMV
103   nnfw::cker::FullyConnectedHybrid(
104       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
105       getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
106       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
107       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
108       _external_context->ruy_context());
109 #else
110   nnfw::cker::FullyConnectedHybrid(
111       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
112       getTensorShape(_weights),
113       (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
114                         : reinterpret_cast<const int8_t *>(_weights->buffer()),
115       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
116       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
117       _external_context->ruy_context());
118
119   if (_cached_weights == nullptr || _is_weights_freed)
120     return;
121
122   // '_cached_weights is not nullptr and _is_weights_freed is false' means
123   // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
124   // After entering here, it will not enter again except below the case - input is zero-vector
125
126   // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
127   // so that handle this case
128   const int input_size = getTensorShape(_input).FlatSize();
129   if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
130     return;
131
132   auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
133
134   // This weight tensor could be other ops' const tensor.
135   // Therefore, below reference should be checked like following
136   auto tensor = const_cast<Tensor *>(weight_tensor);
137   if (tensor->buffer() == nullptr) // ref is already 0?
138   {
139     _is_weights_freed = true;
140     return;
141   }
142
143   tensor->decrease_ref();
144   if (tensor->buffer() == nullptr) // ref == 0?
145   {
146     _is_weights_freed = true;
147   }
148 #endif
149 }
150
151 void FullyConnectedLayer::fullyConnectedSparseWeight()
152 {
153   float output_activation_min = 0, output_activation_max = 0;
154   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
155
156   nnfw::cker::FullyConnectedParams op_params;
157   op_params.float_activation_min = output_activation_min;
158   op_params.float_activation_max = output_activation_max;
159   op_params.activation = convertActivationType(_activation);
160
161   const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
162   const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
163
164   auto block_size = _weights->sparsity()->block_size();
165   if (block_size.size() == 0)
166   {
167     nnfw::cker::FullyConnectedSparseWeightRandom(
168         op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
169         getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
170         getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
171         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
172         w1_indices);
173   }
174   else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
175   {
176     nnfw::cker::FullyConnectedSparseWeight16x1(
177         op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
178         getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
179         getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
180         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
181         w1_indices);
182   }
183   else
184     throw std::runtime_error{"FullyConnected: unsupported sparsity"};
185 }
186
187 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
188                                     const IPortableTensor *bias, ir::Activation activation,
189                                     IPortableTensor *output,
190                                     const std::shared_ptr<ExternalContext> &external_context)
191 {
192   _input = input;
193   _weights = weights;
194   _bias = bias;
195   _activation = activation;
196   _output = output;
197   _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
198                weights->data_type() == OperandType::QUANT_INT8_SYMM;
199   _external_context = external_context;
200 }
201
202 void FullyConnectedLayer::run()
203 {
204   if (_is_hybrid)
205   {
206     fullyConnectedHybrid();
207   }
208   else if (_weights->sparsity())
209   {
210     fullyConnectedSparseWeight();
211   }
212   else if (_input->data_type() == OperandType::FLOAT32)
213   {
214     fullyConnectedFloat32();
215   }
216   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
217   {
218     fullyConnectedQuant8();
219   }
220   else
221   {
222     throw std::runtime_error{"FullyConnected: unsupported data type"};
223   }
224 }
225
226 void FullyConnectedLayer::prepare()
227 {
228   if (_bias && _bias->is_constant())
229   {
230     const int bias_size = getTensorShape(_bias).FlatSize();
231     if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
232     {
233       _bias = nullptr;
234     }
235   }
236
237 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
238   // TODO This is workaround
239   // The only fc hybrid will use ruy kernel
240   if (_input->data_type() != OperandType::FLOAT32 ||
241       _weights->data_type() != OperandType::QUANT_INT8_SYMM)
242   {
243     return;
244   }
245
246   // NOTE. The condition to enable caching on ruy kernel can be changed according to ruy's version
247
248   // If input is dynamic, it changes total size of input
249   // If weights is not constant, weights cannot be cached
250   if (_input->is_dynamic() || !_weights->is_constant())
251     return;
252
253   const int rows = getTensorShape(_weights).Dims(0);
254   if (rows % 4 == 0)
255   {
256     // TODO If it's possible to extract precaching from ruy kernel,
257     // place this instead of below code
258
259     // buffer will be used by ruy kernel as a cache key
260     _cached_weights = _weights->buffer();
261   }
262 #endif
263 }
264
265 } // namespace ops
266 } // namespace cpu
267 } // namespace backend
268 } // namespace onert