2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "FullyConnectedLayer.h"
19 #include "../Tensor.h"
20 #include <cker/operation/FullyConnected.h>
21 #include <cker/TensorUtils.h>
22 #include <misc/polymorphic_downcast.h>
33 FullyConnectedLayer::FullyConnectedLayer()
34 : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
35 _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
36 _external_context(nullptr), _is_hybrid(false)
41 FullyConnectedLayer::~FullyConnectedLayer() = default;
43 void FullyConnectedLayer::fullyConnectedFloat32()
45 float output_activation_min = 0, output_activation_max = 0;
46 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
48 nnfw::cker::FullyConnectedParams op_params;
49 op_params.float_activation_min = output_activation_min;
50 op_params.float_activation_max = output_activation_max;
51 op_params.activation = convertActivationType(_activation);
53 nnfw::cker::FullyConnected(
54 op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
55 getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
56 getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
57 getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
60 // executionMutex is used to protect concurrent access of non-threadsafe resources
61 // like gemmlowp::GemmContext.
62 void FullyConnectedLayer::fullyConnectedQuant8()
64 double real_multiplier = 0.0;
65 int32_t output_multiplier = 0;
66 int32_t output_shift = 0;
67 int32_t output_activation_min = 0;
68 int32_t output_activation_max = 0;
69 GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
70 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
71 CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
72 &output_activation_max);
74 nnfw::cker::FullyConnectedParams op_params;
75 op_params.input_offset = -_input->data_offset();
76 op_params.weights_offset = -_weights->data_offset();
77 op_params.output_offset = _output->data_offset();
78 op_params.output_multiplier = output_multiplier;
79 op_params.output_shift = output_shift;
80 op_params.quantized_activation_min = output_activation_min;
81 op_params.quantized_activation_max = output_activation_max;
83 nnfw::cker::FullyConnected(
84 op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
85 getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
86 getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
87 getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
90 void FullyConnectedLayer::fullyConnectedHybrid()
92 nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
93 if (!temp_arena.prepared)
95 temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
98 nnfw::cker::FullyConnectedParams op_params;
99 op_params.activation = convertActivationType(_activation);
100 op_params.weights_scale = _weights->data_scale();
103 nnfw::cker::FullyConnectedHybrid(
104 op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
105 getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
106 getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
107 getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
108 _external_context->ruy_context());
110 nnfw::cker::FullyConnectedHybrid(
111 op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
112 getTensorShape(_weights),
113 (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
114 : reinterpret_cast<const int8_t *>(_weights->buffer()),
115 getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
116 getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
117 _external_context->ruy_context());
119 if (_cached_weights == nullptr || _is_weights_freed)
122 // '_cached_weights is not nullptr and _is_weights_freed is false' means
123 // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
124 // After entering here, it will not enter again except below the case - input is zero-vector
126 // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
127 // so that handle this case
128 const int input_size = getTensorShape(_input).FlatSize();
129 if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
132 auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
134 // This weight tensor could be other ops' const tensor.
135 // Therefore, below reference should be checked like following
136 auto tensor = const_cast<Tensor *>(weight_tensor);
137 if (tensor->buffer() == nullptr) // ref is already 0?
139 _is_weights_freed = true;
143 tensor->decrease_ref();
144 if (tensor->buffer() == nullptr) // ref == 0?
146 _is_weights_freed = true;
151 void FullyConnectedLayer::fullyConnectedSparseWeight()
153 float output_activation_min = 0, output_activation_max = 0;
154 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
156 nnfw::cker::FullyConnectedParams op_params;
157 op_params.float_activation_min = output_activation_min;
158 op_params.float_activation_max = output_activation_max;
159 op_params.activation = convertActivationType(_activation);
161 const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
162 const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
164 auto block_size = _weights->sparsity()->block_size();
165 if (block_size.size() == 0)
167 nnfw::cker::FullyConnectedSparseWeightRandom(
168 op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
169 getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
170 getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
171 getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
174 else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
176 nnfw::cker::FullyConnectedSparseWeight16x1(
177 op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
178 getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
179 getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
180 getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
184 throw std::runtime_error{"FullyConnected: unsupported sparsity"};
187 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
188 const IPortableTensor *bias, ir::Activation activation,
189 IPortableTensor *output,
190 const std::shared_ptr<ExternalContext> &external_context)
195 _activation = activation;
197 _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
198 weights->data_type() == OperandType::QUANT_INT8_SYMM;
199 _external_context = external_context;
202 void FullyConnectedLayer::run()
206 fullyConnectedHybrid();
208 else if (_weights->sparsity())
210 fullyConnectedSparseWeight();
212 else if (_input->data_type() == OperandType::FLOAT32)
214 fullyConnectedFloat32();
216 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
218 fullyConnectedQuant8();
222 throw std::runtime_error{"FullyConnected: unsupported data type"};
226 void FullyConnectedLayer::prepare()
228 if (_bias && _bias->is_constant())
230 const int bias_size = getTensorShape(_bias).FlatSize();
231 if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
237 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
238 // TODO This is workaround
239 // The only fc hybrid will use ruy kernel
240 if (_input->data_type() != OperandType::FLOAT32 ||
241 _weights->data_type() != OperandType::QUANT_INT8_SYMM)
246 // NOTE. The condition to enable caching on ruy kernel can be changed according to ruy's version
248 // If input is dynamic, it changes total size of input
249 // If weights is not constant, weights cannot be cached
250 if (_input->is_dynamic() || !_weights->is_constant())
253 const int rows = getTensorShape(_weights).Dims(0);
256 // TODO If it's possible to extract precaching from ruy kernel,
257 // place this instead of below code
259 // buffer will be used by ruy kernel as a cache key
260 _cached_weights = _weights->buffer();
267 } // namespace backend