void NNInterpreter::visit(ops::AvgPool2DOp &op)
{
auto inputs = getInputTensors(op);
- auto outputs = AvgPool2D(inputs[0], op)();
+ std::vector<mir::TensorVariant> outputs;
+ switch (inputs[0].get().getElementType())
+ {
+ case mir::DataType::FLOAT32:
+ outputs = AvgPool2D(inputs[0], op)();
+ break;
+ case mir::DataType::UINT8:
+ outputs = QuantizedAvgPool2D(op, inputs[0]);
+ break;
+ default:
+ throw std::runtime_error("NYI");
+ }
setOutputTensors(op, std::move(outputs));
}
void NNInterpreter::visit(ops::Conv2DOp &op)
{
auto inputs = getInputTensors(op);
- auto outputs = Conv2D(inputs[0], inputs[1], op)();
+ std::vector<mir::TensorVariant> outputs;
+ switch (inputs[0].get().getElementType())
+ {
+ case mir::DataType::FLOAT32:
+ outputs = Conv2D(inputs[0], inputs[1], op)();
+ break;
+ case mir::DataType::UINT8:
+ assert(inputs.size() == 3);
+ outputs = QuantizedConv2D(op, inputs[0], inputs[1], inputs[2]);
+ break;
+ default:
+ throw std::runtime_error("NYI");
+ }
setOutputTensors(op, std::move(outputs));
}
void NNInterpreter::visit(ops::DepthwiseConv2DOp &op)
{
auto inputs = getInputTensors(op);
- auto outputs = DepthwiseConv2D(inputs[0], inputs[1], op)();
+ std::vector<mir::TensorVariant> outputs;
+ switch (inputs[0].get().getElementType())
+ {
+ case mir::DataType::FLOAT32:
+ outputs = DepthwiseConv2D(inputs[0], inputs[1], op)();
+ break;
+ case mir::DataType::UINT8:
+ assert(inputs.size() == 3);
+ outputs = QuantizedDepthwiseConv2D(op, inputs[0], inputs[1], inputs[2]);
+ break;
+ default:
+ throw std::runtime_error("NYI");
+ }
setOutputTensors(op, std::move(outputs));
}
void NNInterpreter::visit(ops::AddOp &op)
{
auto inputs = getInputTensors(op);
- auto outputs = Add(op, inputs[0], inputs[1]);
+ std::vector<mir::TensorVariant> outputs;
+ switch (inputs[0].get().getElementType())
+ {
+ case mir::DataType::FLOAT32:
+ outputs = Add(op, inputs[0], inputs[1]);
+ break;
+ case mir::DataType::UINT8:
+ outputs = QuantizedAdd(op, inputs[0], inputs[1]);
+ break;
+ default:
+ throw std::runtime_error("NYI");
+ }
setOutputTensors(op, std::move(outputs));
}
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#ifndef _NNC_CORE_BACKEND_INTERPRETER_ADD_
#define _NNC_CORE_BACKEND_INTERPRETER_ADD_
+#include "QuantizationHelpers.h"
#include "mir/ops/AddOp.h"
#include "mir/Tensor.h"
#include "mir/ShapeRange.h"
+#include <cmath>
+
namespace nnc
{
return {res};
}
+std::vector<mir::TensorVariant> QuantizedAdd(const mir::ops::AddOp &op,
+ const mir::TensorVariant &lhs,
+ const mir::TensorVariant &rhs)
+{
+ const auto &lhs_type = lhs.getType();
+ const auto &rhs_type = rhs.getType();
+ const auto &output_type = op.getOutput(0)->getType();
+
+ assert(lhs_type.isQuantized());
+ assert(rhs_type.isQuantized());
+ assert(output_type.isQuantized());
+
+ int32_t lhs_offset = -lhs_type.getQuantization().getZeroPoint();
+ int32_t rhs_offset = -rhs_type.getQuantization().getZeroPoint();
+ int32_t output_offset = output_type.getQuantization().getZeroPoint();
+
+ double lhs_scale = lhs_type.getQuantization().getScale();
+ double rhs_scale = rhs_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ int left_shift = 20;
+ const double twice_max_input_scale = 2 * std::max(lhs_scale, rhs_scale);
+ const double real_lhs_multiplier = lhs_scale / twice_max_input_scale;
+ const double real_rhs_multiplier = rhs_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t lhs_multiplier = 0;
+ int32_t rhs_multiplier = 0;
+ int32_t output_multiplier = 0;
+ int lhs_shift = 0;
+ int rhs_shift = 0;
+ int output_shift = 0;
+
+ QuantizeMultiplierSmallerThanOneExp(real_lhs_multiplier, &lhs_multiplier, &lhs_shift);
+ QuantizeMultiplierSmallerThanOneExp(real_rhs_multiplier, &rhs_multiplier, &rhs_shift);
+ QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ mir::TensorVariant broadcasted_lhs(lhs, op.getOutputShape(0));
+ mir::TensorVariant broadcasted_rhs(rhs, op.getOutputShape(0));
+ mir::TensorType res_type(mir::DataType::UINT8, op.getOutputShape(0),
+ output_type.getQuantization());
+ mir::TensorVariant res(res_type);
+
+ mir::Tensor<uint8_t> lhs_accessor(broadcasted_lhs);
+ mir::Tensor<uint8_t> rhs_accessor(broadcasted_rhs);
+ mir::Tensor<uint8_t> res_accessor(res);
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (const auto &index : mir::ShapeRange(op.getOutputShape(0)))
+ {
+ const int32_t lhs_val = lhs_accessor.at(index) + lhs_offset;
+ const int32_t rhs_val = rhs_accessor.at(index) + rhs_offset;
+ const int32_t shifted_lhs_val = lhs_val * (1 << left_shift);
+ const int32_t shifted_rhs_val = rhs_val * (1 << left_shift);
+ const int32_t scaled_lhs_val =
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
+ const int32_t scaled_rhs_val =
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
+ const int32_t raw_sum = scaled_lhs_val + scaled_rhs_val;
+ const int32_t raw_output =
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
+ output_offset;
+ const int32_t clamped_output = std::min(output_max, std::max(output_min, raw_output));
+ res_accessor.at(index) = static_cast<uint8_t>(clamped_output);
+ }
+
+ return {res};
+}
+
} // namespace nnc
#endif //_NNC_CORE_BACKEND_INTERPRETER_ADD_
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
return {res};
}
+std::vector<mir::TensorVariant> QuantizedAvgPool2D(const mir::ops::AvgPool2DOp &op,
+ const mir::TensorVariant &input)
+{
+ const auto &input_type = input.getType();
+ const auto &output_type = op.getOutput(0)->getType();
+
+ assert(input_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+
+ const auto &input_shape = op.getInputShape(0);
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &window_size = op.getWindowSize();
+ const auto &strides = op.getStrides();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+
+ constexpr int num_spatial_dims = 2;
+ assert(input.getShape().rank() == 4);
+ assert(window_size.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(padding_before.size() == num_spatial_dims);
+ assert(padding_after.size() == num_spatial_dims);
+
+ Tensor<uint8_t> input_accessor(input);
+
+ TensorType res_type(mir::DataType::UINT8, output_shape, output_type.getQuantization());
+ TensorVariant res(res_type);
+ Tensor<uint8_t> res_accessor(res);
+
+ ShapeRange in_range(input_shape);
+ Index in_index(input_shape.rank());
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (const auto &out_index : ShapeRange(output_shape))
+ {
+ int32_t result = 0;
+ size_t num_elements = 0;
+
+ // Assuming NHWC format.
+ in_index.at(0) = out_index.at(0);
+ in_index.at(3) = out_index.at(3);
+
+ for (const auto &window_index : ShapeRange(Shape(window_size)))
+ {
+ // Assuming NHWC format.
+ for (int i = 0; i < num_spatial_dims; ++i)
+ in_index.at(1 + i) =
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+
+ if (in_range.contains(in_index))
+ {
+ num_elements++;
+ result += input_accessor.at(in_index);
+ }
+ else if (op.getIncludePad())
+ {
+ num_elements++;
+ }
+ }
+ result = (result + num_elements / 2) / num_elements;
+ result = std::max(result, output_min);
+ result = std::min(result, output_max);
+ res_accessor.at(out_index) = static_cast<uint8_t>(result);
+ }
+
+ return {res};
+}
+
} // namespace nnc
const mir::Tensor<float> _input;
};
+std::vector<mir::TensorVariant> QuantizedAvgPool2D(const mir::ops::AvgPool2DOp &op,
+ const mir::TensorVariant &input);
+
} // namespace nnc
#endif //_NNC_CORE_BACKEND_INTERPRETER_AVG_POOL_2D_
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
#include "Conv2D.h"
+#include "QuantizationHelpers.h"
#include "mir/ShapeRange.h"
#include "mir/TensorUtil.h"
+#include <cmath>
+
namespace nnc
{
{
}
+std::vector<mir::TensorVariant> QuantizedConv2D(const mir::ops::Conv2DOp &op,
+ const mir::TensorVariant &input,
+ const mir::TensorVariant &kernel,
+ const mir::TensorVariant &bias)
+{
+ const auto &input_type = input.getType();
+ const auto &kernel_type = kernel.getType();
+ const auto &bias_type = bias.getType();
+ const auto &output_type = op.getOutput(0)->getType();
+
+ assert(input_type.isQuantized());
+ assert(kernel_type.isQuantized());
+ assert(bias_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+ assert(kernel_type.getElementType() == DataType::UINT8);
+ assert(bias_type.getElementType() == DataType::INT32);
+
+ int32_t input_offset = -input_type.getQuantization().getZeroPoint();
+ int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
+ int32_t output_offset = output_type.getQuantization().getZeroPoint();
+
+ double input_scale = input_type.getQuantization().getScale();
+ double kernel_scale = kernel_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ double real_multiplier = input_scale * kernel_scale / output_scale;
+ int32_t output_multiplier = 0;
+ int output_shift = 0;
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const Shape &in_shape = input.getShape();
+ const Shape &kernel_shape = kernel.getShape();
+ const Shape &out_shape = op.getOutputShape(0);
+ const auto &strides = op.getStrides();
+ const std::vector<int32_t> &pads = op.getPaddingBefore();
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(3) == in_shape.dim(3));
+ assert(kernel_shape.dim(0) == out_shape.dim(3));
+ assert(strides.size() == 2);
+ assert(pads.size() == 2);
+
+ int32_t stride_height = strides[0];
+ int32_t stride_width = strides[1];
+
+ int32_t pad_height = pads[0];
+ int32_t pad_width = pads[1];
+
+ int32_t input_height = in_shape.dim(1);
+ int32_t input_width = in_shape.dim(2);
+
+ Tensor<uint8_t> input_accessor(input);
+ Tensor<uint8_t> kernel_accessor(kernel);
+ Tensor<int32_t> bias_accessor(bias);
+
+ TensorType res_type(mir::DataType::UINT8, out_shape, output_type.getQuantization());
+ TensorVariant res(res_type);
+ Tensor<uint8_t> res_accessor(res);
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (int batch = 0; batch < out_shape.dim(0); ++batch)
+ {
+ for (int out_y = 0; out_y < out_shape.dim(1); ++out_y)
+ {
+ for (int out_x = 0; out_x < out_shape.dim(2); ++out_x)
+ {
+ for (int out_channel = 0; out_channel < out_shape.dim(3); ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < kernel_shape.dim(1); ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < kernel_shape.dim(2); ++filter_x)
+ {
+ for (int in_channel = 0; in_channel < kernel_shape.dim(3); ++in_channel)
+ {
+ const int in_x = in_x_origin + filter_x;
+ const int in_y = in_y_origin + filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ Index in_index{batch, in_y, in_x, in_channel};
+ Index ker_index{out_channel, filter_y, filter_x, in_channel};
+ int32_t input_val = input_accessor.at(in_index);
+ int32_t kernel_val = kernel_accessor.at(ker_index);
+ acc += (kernel_val + kernel_offset) * (input_val + input_offset);
+ }
+ }
+ }
+ }
+ acc += bias_accessor.at(Index{out_channel});
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_min);
+ acc = std::min(acc, output_max);
+ Index out_index{batch, out_y, out_x, out_channel};
+ res_accessor.at(out_index) = static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+
+ return {res};
+}
+
} // namespace nnc
const mir::ops::Conv2DOp &_op;
};
+std::vector<mir::TensorVariant> QuantizedConv2D(const mir::ops::Conv2DOp &op,
+ const mir::TensorVariant &input,
+ const mir::TensorVariant &kernel,
+ const mir::TensorVariant &bias);
+
} // namespace nnc
#endif //_NNC_CORE_BACKEND_INTERPRETER_CONV2D_IMPL
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
#include "DepthwiseConv2D.h"
+#include "QuantizationHelpers.h"
#include "mir/ShapeRange.h"
+#include <cmath>
+
namespace nnc
{
{
}
+std::vector<mir::TensorVariant> QuantizedDepthwiseConv2D(const ops::DepthwiseConv2DOp &op,
+ const TensorVariant &input,
+ const TensorVariant &kernel,
+ const TensorVariant &bias)
+{
+ const auto &input_type = input.getType();
+ const auto &kernel_type = kernel.getType();
+ const auto &bias_type = bias.getType();
+ const auto &output_type = op.getOutput(0)->getType();
+
+ assert(input_type.isQuantized());
+ assert(kernel_type.isQuantized());
+ assert(bias_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+ assert(kernel_type.getElementType() == DataType::UINT8);
+ assert(bias_type.getElementType() == DataType::INT32);
+
+ int32_t input_offset = -input_type.getQuantization().getZeroPoint();
+ int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
+ int32_t output_offset = output_type.getQuantization().getZeroPoint();
+
+ double input_scale = input_type.getQuantization().getScale();
+ double kernel_scale = kernel_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ double real_multiplier = input_scale * kernel_scale / output_scale;
+ int32_t output_multiplier = 0;
+ int output_shift = 0;
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const Shape &in_shape = input.getShape();
+ const Shape &kernel_shape = kernel.getShape();
+ const Shape &out_shape = op.getOutputShape(0);
+ const auto &strides = op.getStrides();
+ const std::vector<int32_t> &pads = op.getPaddingBefore();
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(2) == in_shape.dim(3)); // HWIO
+ assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
+ assert(strides.size() == 2);
+ assert(pads.size() == 2);
+
+ int32_t stride_height = strides[0];
+ int32_t stride_width = strides[1];
+
+ int32_t pad_height = pads[0];
+ int32_t pad_width = pads[1];
+
+ int32_t input_height = in_shape.dim(1);
+ int32_t input_width = in_shape.dim(2);
+
+ Tensor<uint8_t> input_accessor(input);
+ Tensor<uint8_t> kernel_accessor(kernel);
+ Tensor<int32_t> bias_accessor(bias);
+
+ TensorType res_type(mir::DataType::UINT8, out_shape, output_type.getQuantization());
+ TensorVariant res(res_type);
+ Tensor<uint8_t> res_accessor(res);
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ int batches = out_shape.dim(0);
+ int output_height = out_shape.dim(1);
+ int output_width = out_shape.dim(2);
+ int input_depth = in_shape.dim(3);
+
+ int filter_height = kernel_shape.dim(0); // HWIO
+ int filter_width = kernel_shape.dim(1); // HWIO
+
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int ic = 0; ic < input_depth; ++ic)
+ {
+ const int oc = ic;
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + filter_x;
+ const int in_y = in_y_origin + filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ Index in_index{b, in_y, in_x, ic};
+ Index ker_index{filter_y, filter_x, oc, 0}; // HWIO
+ int32_t input_val = input_accessor.at(in_index);
+ int32_t kernel_val = kernel_accessor.at(ker_index);
+ acc += (kernel_val + kernel_offset) * (input_val + input_offset);
+ }
+ }
+ }
+ acc += bias_accessor.at(Index{oc});
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_min);
+ acc = std::min(acc, output_max);
+ Index out_index{b, out_y, out_x, oc};
+ res_accessor.at(out_index) = static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+
+ return {res};
+}
+
} // namespace nnc
const mir::ops::DepthwiseConv2DOp &_op;
};
+std::vector<mir::TensorVariant> QuantizedDepthwiseConv2D(const mir::ops::DepthwiseConv2DOp &op,
+ const mir::TensorVariant &input,
+ const mir::TensorVariant &kernel,
+ const mir::TensorVariant &bias);
+
} // namespace nnc
#endif //_NNC_CORE_BACKEND_INTERPRETER_DEPTHWISE_CONV2D_IMPL_
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizationHelpers.h"
+
+#include <cmath>
+#include <limits>
+
+namespace nnc
+{
+
+void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return;
+ }
+
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
+
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ // A shift amount smaller than -31 would cause all bits to be shifted out
+ // and thus all results would be zero. We implement that instead with
+ // q_fixed==0, so as to avoid hitting issues with right-shift
+ // operations with shift amounts greater than 31. Note that this happens
+ // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+ // that we're effectively flushing tiny double_multiplier's to zero.
+ // We could conceivably handle values in the range (roughly) [32, 63]
+ // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+ // the present handling is just doing 'flush denormals to zero'. We could
+ // reconsider and actually generate nonzero denormals if a need arises.
+ if (*shift < -31)
+ {
+ *shift = 0;
+ q_fixed = 0;
+ }
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier < 1.0);
+ assert(double_multiplier > 0.0);
+ int shift;
+ QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+ assert(shift <= 0);
+ *left_shift = shift;
+}
+
+int32_t MaskIfNonZero(int32_t a)
+{
+ static const int32_t zero = 0;
+ return a ? ~zero : zero;
+}
+
+int32_t MaskIfZero(int32_t a) { return MaskIfNonZero(!a); }
+
+int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); }
+
+int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); }
+
+inline int32_t RoundingDivideByPOT(int32_t x, int exponent)
+{
+ assert(exponent >= 0);
+ assert(exponent <= 31);
+ const int32_t mask = (1ll << exponent) - 1;
+ const int32_t remainder = x & mask;
+ const int32_t threshold = (mask >> 1) + (MaskIfLessThan(x, 0) & 1);
+ return (x >> exponent) + (MaskIfGreaterThan(remainder, threshold) & 1);
+}
+
+inline std::int32_t SaturatingRoundingDoublingHighMul(std::int32_t a, std::int32_t b)
+{
+ bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
+ std::int64_t a_64(a);
+ std::int64_t b_64(b);
+ std::int64_t ab_64 = a_64 * b_64;
+ std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
+ return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
+}
+
+int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
+{
+ int left_shift = shift > 0 ? shift : 0;
+ int right_shift = shift > 0 ? 0 : -shift;
+ return RoundingDivideByPOT(
+ SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x, int32_t quantized_multiplier,
+ int left_shift)
+{
+ return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(x, quantized_multiplier),
+ -left_shift);
+}
+
+} // namespace nnc
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_HELPERS_
+#define _NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_HELPERS_
+
+#include "mir/TensorType.h"
+
+namespace nnc
+{
+
+void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift);
+
+void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
+
+int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x, int32_t quantized_multiplier,
+ int left_shift);
+
+} // namespace nnc
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_HELPERS_