Imported Upstream version 1.8.0

[platform/core/ml/nnfw.git] / compute / cker / include / cker / Utils.h
diff --git a/compute/cker/include/cker/Utils.h b/compute/cker/include/cker/Utils.h

index b69d55c..2abb998 100644 (file)
--- a/compute/cker/include/cker/Utils.h
+++ b/compute/cker/include/cker/Utils.h
@@ -123,6 +123,68 @@ inline int CountLeadingZeros(uint32_t integer_input)
    return leading_zeros;
  }
  
+inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
+                                             int32_t *output_inv_sqrt, int *output_shift)
+{
+  assert(input >= 0);
+  if (input <= 1)
+  {
+    // Handle the input value 1 separately to avoid overflow in that case
+    // in the general computation below (b/143972021). Also handle 0 as if it
+    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
+    // but rare/unrealistic input value. We can expect both to occur in some
+    // incompletely trained models, but probably not in fully trained models.
+    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
+    *output_shift = 0;
+    return;
+  }
+  assert(input > 1);
+  *output_shift = 11;
+  while (input >= (1 << 29))
+  {
+    input /= 4;
+    ++*output_shift;
+  }
+  const unsigned max_left_shift_bits = CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
+  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
+  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
+  *output_shift -= left_shift_bit_pairs;
+  input <<= 2 * left_shift_bit_pairs;
+  assert(input >= (1 << 27));
+  assert(input < (1 << 29));
+  using gemmlowp::FixedPoint;
+  using gemmlowp::Rescale;
+  using gemmlowp::SaturatingRoundingMultiplyByPOT;
+  // Using 3 integer bits gives us enough room for the internal arithmetic in
+  // this Newton-Raphson iteration.
+  using F3 = FixedPoint<int32_t, 3>;
+  using F0 = FixedPoint<int32_t, 0>;
+  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
+  const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
+  const F3 fixedpoint_half_three =
+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
+  // Newton-Raphson iteration
+  // Naive unoptimized starting guess: x = 1
+  F3 x = F3::One();
+  // Naive unoptimized number of iterations: 5
+  for (int i = 0; i < 5; i++)
+  {
+    const F3 x3 = Rescale<3>(x * x * x);
+    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
+  }
+  const F0 fixedpoint_half_sqrt_2 =
+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
+  x = x * fixedpoint_half_sqrt_2;
+  *output_inv_sqrt = x.raw();
+  if (*output_shift < 0)
+  {
+    *output_inv_sqrt <<= -*output_shift;
+    *output_shift = 0;
+  }
+  // Convert right shift (right is positive) to left shift.
+  *output_shift *= reverse_shift;
+}
+
  // Comment from tensorflow lite:
  //
  // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING