2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "FullyConnectedLayer.h"
19 #include "../Tensor.h"
20 #include <cker/operation/FullyConnected.h>
21 #include <cker/TensorUtils.h>
22 #include <misc/polymorphic_downcast.h>
33 FullyConnectedLayer::FullyConnectedLayer()
34 : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
35 _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
36 _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
41 FullyConnectedLayer::~FullyConnectedLayer() = default;
43 void FullyConnectedLayer::fullyConnectedFloat32()
45 nnfw::cker::FullyConnectedParams op_params;
46 op_params.activation = convertActivationType(_activation);
48 nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<float>(_input),
49 getShape(_weights), getBuffer<float>(_weights), getShape(_bias),
50 _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
51 getBuffer<float>(_output));
54 // executionMutex is used to protect concurrent access of non-threadsafe resources
55 // like gemmlowp::GemmContext.
56 void FullyConnectedLayer::fullyConnectedQuant8()
58 double real_multiplier = 0.0;
59 int32_t output_multiplier = 0;
60 int32_t output_shift = 0;
61 int32_t output_activation_min = 0;
62 int32_t output_activation_max = 0;
63 GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
64 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
65 CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
66 &output_activation_max);
68 nnfw::cker::FullyConnectedParams op_params;
69 op_params.input_offset = -_input->data_zero_point();
70 op_params.weights_offset = -_weights->data_zero_point();
71 op_params.output_offset = _output->data_zero_point();
72 op_params.output_multiplier = output_multiplier;
73 op_params.output_shift = output_shift;
74 op_params.quantized_activation_min = output_activation_min;
75 op_params.quantized_activation_max = output_activation_max;
77 nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<uint8_t>(_input),
78 getShape(_weights), getBuffer<uint8_t>(_weights), getShape(_bias),
79 _bias ? getBuffer<int32_t>(_bias) : nullptr, getShape(_output),
80 getBuffer<uint8_t>(_output));
83 void FullyConnectedLayer::fullyConnectedHybrid()
85 nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
86 if (!temp_arena.prepared)
88 temp_arena.prepare(getShape(_input), getShape(_weights));
91 nnfw::cker::FullyConnectedParams op_params;
92 op_params.activation = convertActivationType(_activation);
93 op_params.weights_scale = _weights->data_scale();
96 nnfw::cker::FullyConnectedHybrid(
97 op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
98 getBuffer<int8_t>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
99 getShape(_output), getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
101 nnfw::cker::FullyConnectedHybrid(
102 op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
103 (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
104 : getBuffer<int8_t>(_weights),
105 getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
106 getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
108 if (_cached_weights == nullptr || _is_weights_freed)
111 // '_cached_weights is not nullptr and _is_weights_freed is false' means
112 // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
113 // After entering here, it will not enter again except below the case - input is zero-vector
115 // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
116 // so that handle this case
117 const int input_size = getShape(_input).FlatSize();
118 if (nnfw::cker::IsZeroVector(getBuffer<float>(_input), input_size))
121 auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
123 // This weight tensor could be other ops' const tensor.
124 // Therefore, below reference should be checked like following
125 auto tensor = const_cast<Tensor *>(weight_tensor);
126 if (tensor->buffer() == nullptr) // ref is already 0?
128 _is_weights_freed = true;
132 tensor->decrease_ref();
133 if (tensor->buffer() == nullptr) // ref == 0?
135 #if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
136 // NOTE This line forces OS to release any unused memory immediately
139 _is_weights_freed = true;
144 void FullyConnectedLayer::fullyConnectedSparseWeight()
146 nnfw::cker::FullyConnectedParams op_params;
147 op_params.activation = convertActivationType(_activation);
149 const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
150 const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
152 auto block_size = _weights->sparsity()->block_size();
153 if (block_size.size() == 0)
155 nnfw::cker::FullyConnectedSparseWeightRandom(
156 op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
157 getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
158 getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
160 else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
162 nnfw::cker::FullyConnectedSparseWeight16x1(
163 op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
164 getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
165 getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
168 throw std::runtime_error{"FullyConnected: unsupported sparsity"};
171 void FullyConnectedLayer::fullyConnected16x1Float32()
173 #if defined(__aarch64__) && defined(USE_NEON)
174 float output_activation_min = 0, output_activation_max = 0;
175 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
177 nnfw::cker::FullyConnectedParams op_params;
178 op_params.activation = convertActivationType(_activation);
180 nnfw::cker::FullyConnected16x1Float32(op_params, getShape(_input), getBuffer<float>(_input),
181 getShape(_weights), getBuffer<float>(_weights),
182 getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
183 getShape(_output), getBuffer<float>(_output));
185 throw std::runtime_error{"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
189 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
190 const IPortableTensor *bias, ir::Activation activation,
191 ir::FullyConnectedWeightsFormat weights_format,
192 IPortableTensor *output,
193 const std::shared_ptr<ExternalContext> &external_context)
198 _activation = activation;
200 _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
201 weights->data_type() == OperandType::QUANT_INT8_SYMM;
202 _is_shuffled16x1float32 = weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32;
203 #if !defined(__aarch64__) || !defined(USE_NEON)
204 if (_is_shuffled16x1float32)
206 throw std::runtime_error{
207 "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
210 _external_context = external_context;
213 void FullyConnectedLayer::run()
217 fullyConnectedHybrid();
219 else if (_weights->sparsity())
221 fullyConnectedSparseWeight();
223 else if (_input->data_type() == OperandType::FLOAT32)
225 _is_shuffled16x1float32 ? fullyConnected16x1Float32() : fullyConnectedFloat32();
227 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
229 fullyConnectedQuant8();
233 throw std::runtime_error{"FullyConnected: unsupported data type"};
237 void FullyConnectedLayer::prepare()
239 if (_bias && _bias->is_constant())
241 const int bias_size = getShape(_bias).FlatSize();
242 if (nnfw::cker::IsZeroVector(getBuffer<float>(_bias), bias_size))
248 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
249 // TODO This is workaround
250 // The only fc hybrid will use ruy kernel
251 if (_input->data_type() != OperandType::FLOAT32 ||
252 _weights->data_type() != OperandType::QUANT_INT8_SYMM)
257 // NOTE. The condition to enable caching on ruy kernel can be changed according to ruy's version
259 // If input is dynamic, it changes total size of input
260 // If weights is not constant, weights cannot be cached
261 if (_input->is_dynamic() || !_weights->is_constant())
264 const int rows = getShape(_weights).Dims(0);
267 // TODO If it's possible to extract precaching from ruy kernel,
268 // place this instead of below code
270 // buffer will be used by ruy kernel as a cache key
271 _cached_weights = _weights->buffer();
278 } // namespace backend