6857f7f9fc130808e4740f58667a13387932076f
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / ops / FullyConnectedLayer.cc
1 /*
2  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "FullyConnectedLayer.h"
18
19 #include "../Tensor.h"
20 #include <cker/operation/FullyConnected.h>
21 #include <cker/TensorUtils.h>
22 #include <misc/polymorphic_downcast.h>
23
24 namespace onert
25 {
26 namespace backend
27 {
28 namespace cpu
29 {
30 namespace ops
31 {
32
33 FullyConnectedLayer::FullyConnectedLayer()
34   : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
35     _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
36     _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
37 {
38   // DO NOTHING
39 }
40
41 FullyConnectedLayer::~FullyConnectedLayer() = default;
42
43 void FullyConnectedLayer::fullyConnectedFloat32()
44 {
45   nnfw::cker::FullyConnectedParams op_params;
46   op_params.activation = convertActivationType(_activation);
47
48   nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<float>(_input),
49                              getShape(_weights), getBuffer<float>(_weights), getShape(_bias),
50                              _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
51                              getBuffer<float>(_output));
52 }
53
54 // executionMutex is used to protect concurrent access of non-threadsafe resources
55 // like gemmlowp::GemmContext.
56 void FullyConnectedLayer::fullyConnectedQuant8()
57 {
58   double real_multiplier = 0.0;
59   int32_t output_multiplier = 0;
60   int32_t output_shift = 0;
61   int32_t output_activation_min = 0;
62   int32_t output_activation_max = 0;
63   GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
64   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
65   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
66                                     &output_activation_max);
67
68   nnfw::cker::FullyConnectedParams op_params;
69   op_params.input_offset = -_input->data_zero_point();
70   op_params.weights_offset = -_weights->data_zero_point();
71   op_params.output_offset = _output->data_zero_point();
72   op_params.output_multiplier = output_multiplier;
73   op_params.output_shift = output_shift;
74   op_params.quantized_activation_min = output_activation_min;
75   op_params.quantized_activation_max = output_activation_max;
76
77   nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<uint8_t>(_input),
78                              getShape(_weights), getBuffer<uint8_t>(_weights), getShape(_bias),
79                              _bias ? getBuffer<int32_t>(_bias) : nullptr, getShape(_output),
80                              getBuffer<uint8_t>(_output));
81 }
82
83 void FullyConnectedLayer::fullyConnectedHybrid()
84 {
85   nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
86   if (!temp_arena.prepared)
87   {
88     temp_arena.prepare(getShape(_input), getShape(_weights));
89   }
90
91   nnfw::cker::FullyConnectedParams op_params;
92   op_params.activation = convertActivationType(_activation);
93   op_params.weights_scale = _weights->data_scale();
94
95 #ifndef USE_RUY_GEMV
96   nnfw::cker::FullyConnectedHybrid(
97     op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
98     getBuffer<int8_t>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
99     getShape(_output), getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
100 #else
101   nnfw::cker::FullyConnectedHybrid(
102     op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
103     (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
104                       : getBuffer<int8_t>(_weights),
105     getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
106     getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
107
108   if (_cached_weights == nullptr || _is_weights_freed)
109     return;
110
111   // '_cached_weights is not nullptr and _is_weights_freed is false' means
112   // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
113   // After entering here, it will not enter again except below the case - input is zero-vector
114
115   // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
116   // so that handle this case
117   const int input_size = getShape(_input).FlatSize();
118   if (nnfw::cker::IsZeroVector(getBuffer<float>(_input), input_size))
119     return;
120
121   auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
122
123   // This weight tensor could be other ops' const tensor.
124   // Therefore, below reference should be checked like following
125   auto tensor = const_cast<Tensor *>(weight_tensor);
126   if (tensor->buffer() == nullptr) // ref is already 0?
127   {
128     _is_weights_freed = true;
129     return;
130   }
131
132   tensor->decrease_ref();
133   if (tensor->buffer() == nullptr) // ref == 0?
134   {
135 #if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
136     // NOTE This line forces OS to release any unused memory immediately
137     mallopt(M_PURGE, 0);
138 #endif
139     _is_weights_freed = true;
140   }
141 #endif
142 }
143
144 void FullyConnectedLayer::fullyConnectedSparseWeight()
145 {
146   nnfw::cker::FullyConnectedParams op_params;
147   op_params.activation = convertActivationType(_activation);
148
149   const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
150   const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
151
152   auto block_size = _weights->sparsity()->block_size();
153   if (block_size.size() == 0)
154   {
155     nnfw::cker::FullyConnectedSparseWeightRandom(
156       op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
157       getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
158       getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
159   }
160   else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
161   {
162     nnfw::cker::FullyConnectedSparseWeight16x1(
163       op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
164       getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
165       getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
166   }
167   else
168     throw std::runtime_error{"FullyConnected: unsupported sparsity"};
169 }
170
171 void FullyConnectedLayer::fullyConnected16x1Float32()
172 {
173 #if defined(__aarch64__) && defined(USE_NEON)
174   float output_activation_min = 0, output_activation_max = 0;
175   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
176
177   nnfw::cker::FullyConnectedParams op_params;
178   op_params.activation = convertActivationType(_activation);
179
180   nnfw::cker::FullyConnected16x1Float32(op_params, getShape(_input), getBuffer<float>(_input),
181                                         getShape(_weights), getBuffer<float>(_weights),
182                                         getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
183                                         getShape(_output), getBuffer<float>(_output));
184 #else
185   throw std::runtime_error{"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
186 #endif
187 }
188
189 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
190                                     const IPortableTensor *bias, ir::Activation activation,
191                                     ir::FullyConnectedWeightsFormat weights_format,
192                                     IPortableTensor *output,
193                                     const std::shared_ptr<ExternalContext> &external_context)
194 {
195   _input = input;
196   _weights = weights;
197   _bias = bias;
198   _activation = activation;
199   _output = output;
200   _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
201                weights->data_type() == OperandType::QUANT_INT8_SYMM;
202   _is_shuffled16x1float32 = weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32;
203 #if !defined(__aarch64__) || !defined(USE_NEON)
204   if (_is_shuffled16x1float32)
205   {
206     throw std::runtime_error{
207       "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
208   }
209 #endif
210   _external_context = external_context;
211 }
212
213 void FullyConnectedLayer::run()
214 {
215   if (_is_hybrid)
216   {
217     fullyConnectedHybrid();
218   }
219   else if (_weights->sparsity())
220   {
221     fullyConnectedSparseWeight();
222   }
223   else if (_input->data_type() == OperandType::FLOAT32)
224   {
225     _is_shuffled16x1float32 ? fullyConnected16x1Float32() : fullyConnectedFloat32();
226   }
227   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
228   {
229     fullyConnectedQuant8();
230   }
231   else
232   {
233     throw std::runtime_error{"FullyConnected: unsupported data type"};
234   }
235 }
236
237 void FullyConnectedLayer::prepare()
238 {
239   if (_bias && _bias->is_constant())
240   {
241     const int bias_size = getShape(_bias).FlatSize();
242     if (nnfw::cker::IsZeroVector(getBuffer<float>(_bias), bias_size))
243     {
244       _bias = nullptr;
245     }
246   }
247
248 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
249   // TODO This is workaround
250   // The only fc hybrid will use ruy kernel
251   if (_input->data_type() != OperandType::FLOAT32 ||
252       _weights->data_type() != OperandType::QUANT_INT8_SYMM)
253   {
254     return;
255   }
256
257   // NOTE. The condition to enable caching on ruy kernel can be changed according to ruy's version
258
259   // If input is dynamic, it changes total size of input
260   // If weights is not constant, weights cannot be cached
261   if (_input->is_dynamic() || !_weights->is_constant())
262     return;
263
264   const int rows = getShape(_weights).Dims(0);
265   if (rows % 4 == 0)
266   {
267     // TODO If it's possible to extract precaching from ruy kernel,
268     // place this instead of below code
269
270     // buffer will be used by ruy kernel as a cache key
271     _cached_weights = _weights->buffer();
272   }
273 #endif
274 }
275
276 } // namespace ops
277 } // namespace cpu
278 } // namespace backend
279 } // namespace onert