src/armnn/TypesUtils.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5 #include <armnn/TypesUtils.hpp>
   6 #include <armnn/utility/Assert.hpp>
   7 #include <armnn/utility/NumericCast.hpp>
   8
   9 namespace
  10 {
  11 /// Workaround for std:isnan() not being implemented correctly for integral types in MSVC.
  12 /// https://stackoverflow.com/a/56356405
  13 /// @{
  14 template <typename T, typename std::enable_if<std::is_integral<T>::value, T>::type* = nullptr>
  15 inline int IsNan(T x)
  16 {
  17     // The spec defines integral types to be handled as if they were casted to doubles.
  18     return std::isnan(static_cast<double>(x));
  19 }
  20
  21 template <typename T, typename std::enable_if<!std::is_integral<T>::value, T>::type * = nullptr>
  22 inline int IsNan(T x)
  23 {
  24     return std::isnan(x);
  25 }
  26 /// @}
  27 }    // namespace std
  28
  29 template<typename QuantizedType>
  30 QuantizedType armnn::Quantize(float value, float scale, int32_t offset)
  31 {
  32     static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
  33     constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
  34     constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
  35     ARMNN_ASSERT(scale != 0.f);
  36     ARMNN_ASSERT(!std::isnan(value));
  37
  38     float clampedValue = std::min(std::max(static_cast<float>(round(value/scale) + offset), static_cast<float>(min)),
  39                                   static_cast<float>(max));
  40     auto quantizedBits = static_cast<QuantizedType>(clampedValue);
  41
  42     return quantizedBits;
  43 }
  44
  45 template <typename QuantizedType>
  46 float armnn::Dequantize(QuantizedType value, float scale, int32_t offset)
  47 {
  48     static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
  49     ARMNN_ASSERT(scale != 0.f);
  50     ARMNN_ASSERT(!IsNan(value));
  51     return (armnn::numeric_cast<float>(value - offset)) * scale;
  52 }
  53
  54 /// Explicit specialization of Quantize for int8_t
  55 template
  56 int8_t armnn::Quantize<int8_t>(float value, float scale, int32_t offset);
  57
  58 /// Explicit specialization of Quantize for uint8_t
  59 template
  60 uint8_t armnn::Quantize<uint8_t>(float value, float scale, int32_t offset);
  61
  62 /// Explicit specialization of Quantize for int16_t
  63 template
  64 int16_t armnn::Quantize<int16_t>(float value, float scale, int32_t offset);
  65
  66 /// Explicit specialization of Quantize for int32_t
  67 template
  68 int32_t armnn::Quantize<int32_t>(float value, float scale, int32_t offset);
  69
  70 /// Explicit specialization of Dequantize for int8_t
  71 template
  72 float armnn::Dequantize<int8_t>(int8_t value, float scale, int32_t offset);
  73
  74 /// Explicit specialization of Dequantize for uint8_t
  75 template
  76 float armnn::Dequantize<uint8_t>(uint8_t value, float scale, int32_t offset);
  77
  78 /// Explicit specialization of Dequantize for int16_t
  79 template
  80 float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset);
  81
  82 /// Explicit specialization of Dequantize for int32_t
  83 template
  84 float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);