14 #include <initializer_list> 18 #include <boost/numeric/conversion/cast.hpp> 23 template<
typename T,
bool DoQuantize=true>
26 static T
Quantize(
float value,
float scale, int32_t offset)
28 return armnn::Quantize<T>(value, scale, offset);
31 static float Dequantize(T value,
float scale, int32_t offset)
40 static T
Quantize(
float value,
float scale, int32_t offset)
46 static float Dequantize(T value,
float scale, int32_t offset)
97 template<
typename ItType>
100 static constexpr
bool value=std::is_floating_point<typename std::iterator_traits<ItType>::value_type>::value;
103 template <
typename T,
typename FloatIt,
104 typename std::enable_if<IsFloatingPointIterator<FloatIt>::value,
int>::type=0
106 std::vector<T>
QuantizedVector(FloatIt first, FloatIt last,
float qScale, int32_t qOffset)
108 std::vector<T> quantized;
109 quantized.reserve(boost::numeric_cast<size_t>(std::distance(first, last)));
111 for (
auto it = first; it != last; ++it)
114 T q = SelectiveQuantize<T>(f, qScale, qOffset);
115 quantized.push_back(q);
122 std::vector<T>
QuantizedVector(
const std::vector<float>& array,
float qScale = 1.f, int32_t qOffset = 0)
124 return QuantizedVector<T>(array.begin(), array.end(), qScale, qOffset);
128 std::vector<T>
QuantizedVector(std::initializer_list<float> array,
float qScale = 1.f, int32_t qOffset = 0)
130 return QuantizedVector<T>(array.begin(), array.end(), qScale, qOffset);
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
T SelectiveQuantize(float value, float scale, int32_t offset)
static T Quantize(float value, float scale, int32_t offset)
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
static float Dequantize(armnn::Half value, float scale, int32_t offset)
std::vector< T > QuantizedVector(FloatIt first, FloatIt last, float qScale, int32_t qOffset)
static armnn::BFloat16 Quantize(float value, float scale, int32_t offset)
static float Dequantize(T value, float scale, int32_t offset)
static float Dequantize(armnn::BFloat16 value, float scale, int32_t offset)
static T Quantize(float value, float scale, int32_t offset)
static float Dequantize(T value, float scale, int32_t offset)
float SelectiveDequantize(T value, float scale, int32_t offset)
static armnn::Half Quantize(float value, float scale, int32_t offset)