2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "ErrorApproximator.h"
23 #include <luci/IR/CircleNode.h>
29 using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
31 inline bool has_min_max(const CircleNode *node)
33 return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
36 inline uint32_t cal_offset(const loco::TensorShape &dimension, uint32_t *indices)
38 return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
39 dimension.dim(3).value() +
40 indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
41 indices[2] * dimension.dim(3).value() + indices[3];
44 uint32_t get_channel_dim_index(const CircleNode *node)
47 auto opcode = node->opcode();
50 case CircleOpcode::CONV_2D:
51 case CircleOpcode::TRANSPOSE_CONV:
52 case CircleOpcode::FULLY_CONNECTED:
55 case CircleOpcode::DEPTHWISE_CONV_2D:
59 throw std::runtime_error("Failed to find channel index in " + node->name());
65 bool set_weight_dim(const CircleNode *node, const CircleConst *weights,
66 loco::TensorShape &dimension)
68 auto opcode = node->opcode();
71 case CircleOpcode::CONV_2D:
72 case CircleOpcode::TRANSPOSE_CONV:
73 case CircleOpcode::DEPTHWISE_CONV_2D:
74 assert(node->rank() == 4);
75 dimension.rank(node->rank());
76 dimension.dim(0).set(weights->dim(0).value());
77 dimension.dim(1).set(weights->dim(1).value());
78 dimension.dim(2).set(weights->dim(2).value());
79 dimension.dim(3).set(weights->dim(3).value());
81 case CircleOpcode::FULLY_CONNECTED:
82 assert(node->rank() == 2);
84 dimension.dim(0).set(weights->dim(0).value());
85 dimension.dim(1).set(1); // Set FC layer like CONV
86 dimension.dim(2).set(1);
87 dimension.dim(3).set(weights->dim(1).value());
96 loco::Node *get_weight(const CircleNode *node)
98 loco::Node *weight = nullptr;
99 auto opcode = node->opcode();
102 case CircleOpcode::CONV_2D:
104 auto conv = loco::must_cast<const CircleConv2D *>(node);
105 weight = conv->filter();
108 case CircleOpcode::DEPTHWISE_CONV_2D:
110 auto dconv = loco::must_cast<const CircleDepthwiseConv2D *>(node);
111 weight = dconv->filter();
114 case CircleOpcode::TRANSPOSE_CONV:
116 auto tconv = loco::must_cast<const CircleTransposeConv *>(node);
117 weight = tconv->filter();
120 case CircleOpcode::FULLY_CONNECTED:
122 auto fc = loco::must_cast<const CircleFullyConnected *>(node);
123 weight = fc->weights();
133 inline CircleConst *get_constant_weight(const CircleNode *node)
135 CircleConst *weight = dynamic_cast<CircleConst *>(get_weight(node));
136 if (weight == nullptr)
138 throw std::runtime_error("Unsupported non-constant weights in convolution node " +
145 void iterate_per_channel(const CircleNode *node, IterFunc func)
147 CircleConst *weight = get_constant_weight(node);
149 loco::TensorShape dimension;
150 set_weight_dim(node, weight, dimension);
151 uint32_t indices[4] = {
155 auto channel_dim_index = get_channel_dim_index(node);
157 for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
159 for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
161 for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
163 for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
165 func(indices, dimension, channel_dim_index);
172 void cal_minmax_per_channel(const CircleNode *node, std::vector<float> &min,
173 std::vector<float> &max)
175 CircleConst *weight = get_constant_weight(node);
177 loco::TensorShape dimension;
178 set_weight_dim(node, weight, dimension);
180 auto channel_dim_index = get_channel_dim_index(node);
181 auto size = dimension.dim(channel_dim_index).value();
183 std::vector<bool> has_min_max_value(size, false);
187 auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension,
188 uint32_t channel_dim_index) {
189 uint32_t channel_idx = indices[channel_dim_index];
190 auto data = weight->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
191 if (has_min_max_value[channel_idx])
193 min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
194 max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
198 min[channel_idx] = data;
199 max[channel_idx] = data;
200 has_min_max_value[channel_idx] = true;
204 iterate_per_channel(node, cal_minmax);
207 bool get_shape(const CircleNode *circle_node, std::vector<uint32_t> &shape)
209 if (circle_node->shape_status() == ShapeStatus::VALID)
211 auto rank = circle_node->rank();
216 for (uint32_t i = 0; i < rank; i++)
218 shape[i] = circle_node->dim(i).value();
227 * @brief get_additions_per_channel computes W * H * CIN * KW * KH.
229 * W, H - width/height of OFM; KW, KH - convolution kernel width/height;
230 * CIN - number of channels in IFM (for depthwise its unity)
232 * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638
235 uint32_t get_additions_per_channel(const CircleNode *node)
237 uint32_t adds_per_channel = 1;
238 std::vector<uint32_t> ofm_shape;
239 if (!get_shape(node, ofm_shape)) // [BATCH, W, H, channels_out]
241 throw std::runtime_error("Failed to find correct shape " + node->name());
244 adds_per_channel *= ofm_shape[1] * ofm_shape[2]; // adds_per_channel *= W * H
246 auto weights = loco::must_cast<CircleNode *>(get_weight(node));
248 std::vector<uint32_t> w_shape;
249 if (get_shape(weights, w_shape)) // [channels_out, k_x, k_y, channels_in]
251 adds_per_channel *= (w_shape[1] * w_shape[2]); // adds_per_channel *= k_x * k_y
253 if (node->opcode() != CircleOpcode::DEPTHWISE_CONV_2D)
255 // for not depthwise convolutions we need to scale it by CIN
256 adds_per_channel *= w_shape[3]; // adds_per_channel *= c_in
260 return adds_per_channel;
263 void get_min_max_ifm_values(const CircleNode *node, float &ci_min, float &ci_max)
265 auto preds = loco::preds(node);
266 for (const auto &pred : preds)
268 auto parent_node = loco::must_cast<const luci::CircleNode *>(pred);
269 if (has_min_max(parent_node))
271 auto quantparam = parent_node->quantparam();
272 if (quantparam->min.size() > 0)
274 ci_min = quantparam->min[0];
275 ci_max = quantparam->max[0];
282 * @brief Return upper bound of quantization error for CONV, DCONV, TCONV.
285 * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638 for details.
287 float approximate_conv(const CircleNode *node)
289 float volume_W_A_err = 0.f;
291 // activation min-max values
294 get_min_max_ifm_values(node, ci_min, ci_max);
296 // channel-wise min, max
297 std::vector<float> min_values;
298 std::vector<float> max_values;
299 cal_minmax_per_channel(node, min_values, max_values);
300 assert(not min_values.empty());
301 assert(not max_values.empty());
303 // ranges = (max_values - min_values)
304 std::vector<float> ranges;
305 std::transform(max_values.begin(), max_values.end(), min_values.begin(),
306 std::back_inserter(ranges), std::minus<float>());
308 // maximal weight value across all channels
311 assert(max_values.size() == min_values.size());
312 for (size_t i = 0; i < max_values.size(); ++i)
314 w_max = std::max(w_max, std::abs(max_values[i]));
315 w_max = std::max(w_max, std::abs(min_values[i]));
319 // total weight quantization error across all channels
320 // so maximal error of quantization is ~ (max_value - min_value) / 255
321 // omitting 255 term we get that maximal error of quantization is just its range
323 for (auto cur_err : ranges)
328 uint32_t adds_per_channel = get_additions_per_channel(node);
329 uint32_t num_of_channels = ranges.size();
331 // maximal error introduced by weights quantization (for all channels)
332 volume_W_A_err = sum_err * std::max(::fabs(ci_max), ::fabs(ci_min));
333 // plus total error introduced by activation quantization (for all channels)
334 volume_W_A_err += w_max * num_of_channels * ::fabs(ci_max - ci_min);
335 // scale by volume of adds per channel
336 volume_W_A_err *= adds_per_channel;
337 // scale to get more readable output values
338 volume_W_A_err /= 1.e+6f;
341 return volume_W_A_err;
352 * How Approximate works?
354 * Currently it works just for convolution layers, but may be generalized for other types as well.
355 * See discussion at https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598
356 * Convolution can be expressed as a matrix multiplication.
357 * While quantizing we introduce quantization error into convolution operand (activations) as well
358 * as into convolution weights. A_q * W_q = (A + q_err(A)) * (W + q_err(W)) = A * W + A * q_err(W) +
359 * W * q_err(A) + q_err(A) * q_err(W), assuming q_err(A) * q_err(W) are negligible as quadratic
360 * terms, we get A_q * W_q ~ A * W + A * q_err(W) + W * q_err(A) , q_err - quantization error,
361 * W - weight matrix, A - activations from previous layer (IFM), so quantization error of matrix
362 * multiplication can be approximated as A * q_err(W) + W * q_err(A). Estimating its upper bound
363 * we get A * q_err(W) + W * q_err(A) <=
364 * number_of_additions * (A_max * (W_max - W_min) / 255 + W_max * (A_max - A_min) / 255)
365 * The following code tries to get total error for quantizing convolution node into Q8.
366 * It's just an heuristic (Metric sensitivity depends highly on derivatives as well).
368 float approximate(const CircleNode *node)
370 auto opcode = node->opcode();
374 case CircleOpcode::DEPTHWISE_CONV_2D:
375 case CircleOpcode::CONV_2D:
376 case CircleOpcode::TRANSPOSE_CONV:
377 qerror = approximate_conv(node);
379 default: // TODO (FULLY_CONNECTED e.g.)
386 } // namespace bisection
387 } // namespace mpqsolver