onert-micro/luci-interpreter/src/kernels/TestUtils.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H
  19 #define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
  20
  21 #include "luci_interpreter/core/Tensor.h"
  22 #include "luci_interpreter/MemoryManager.h"
  23
  24 #include <type_traits>
  25
  26 #include <gtest/gtest.h>
  27 #include <gmock/gmock.h>
  28
  29 namespace luci_interpreter
  30 {
  31 namespace kernels
  32 {
  33 namespace testing
  34 {
  35
  36 template <typename T>
  37 std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
  38
  39 template <DataType DT>
  40 Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
  41                        IMemoryManager *memory_manager)
  42 {
  43   Tensor tensor(DT, shape, {}, "");
  44   memory_manager->allocate_memory(tensor);
  45   tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
  46   return tensor;
  47 }
  48
  49 /**
  50  * @brief Create layer-wise quantized tensor
  51  * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
  52  * @param shape desired tensor shape
  53  * @param scale scale of quantized number
  54  * @param zero_point zero point of quantized number, should be 0 for signed datatypes
  55  * @param data floating point data for quantization
  56  * @param memory_manager memory manager for allocating memory to tensor
  57  * @return created tensor
  58  */
  59 template <DataType DT>
  60 Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
  61                        const std::vector<float> &data, IMemoryManager *memory_manager)
  62 {
  63   using NativeT = typename DataTypeImpl<DT>::Type;
  64   Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
  65   std::vector<NativeT> quantized_data =
  66     quantize<NativeT>(data.data(), data.size(), scale, zero_point);
  67   memory_manager->allocate_memory(tensor);
  68   tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
  69   return tensor;
  70 }
  71
  72 /**
  73  * @brief Create channel-wise quantized tensor
  74  * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
  75  * @param shape desired tensor shape
  76  * @param scales scales of quantized number
  77  * @param zero_points zero points of quantized number, should be 0 for signed datatypes
  78  * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
  79  * @param data floating point data for quantization
  80  * @param memory_manager memory manager for allocating memory to tensor
  81  * @return created tensor
  82  */
  83 template <DataType DT>
  84 Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
  85                        const std::vector<int32_t> &zero_points, int quantized_dimension,
  86                        const std::vector<float> &data, IMemoryManager *memory_manager)
  87 {
  88   using NativeT = typename DataTypeImpl<DT>::Type;
  89   assert(quantized_dimension < shape.num_dims());
  90   Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, "");
  91
  92   // quantize_dimension breaks shape into two parts:
  93   // inner dimensions that contains continuous data with one quantization type
  94   // outer dimensions that contains other dimensions
  95   size_t outer_dims_size = 1;
  96   int32_t quant_dim_size = shape.dim(quantized_dimension);
  97   size_t inner_dims_size = 1;
  98   assert(quant_dim_size == scales.size());
  99   assert(quant_dim_size == zero_points.size());
 100
 101   for (int i = 0; i < quantized_dimension; ++i)
 102     outer_dims_size *= shape.dim(i);
 103   for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
 104     inner_dims_size *= shape.dim(i);
 105
 106   assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
 107
 108   std::vector<NativeT> quantized_data;
 109   quantized_data.reserve(shape.num_elements());
 110   for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
 111     for (int32_t channel = 0; channel < quant_dim_size; ++channel)
 112     {
 113       int32_t zero_point = zero_points[channel];
 114       float scale = scales[channel];
 115       size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
 116       std::vector<NativeT> part_quantized_data =
 117         quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
 118       quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
 119                             part_quantized_data.end());
 120     }
 121   assert(quantized_data.size() == shape.num_elements());
 122   memory_manager->allocate_memory(tensor);
 123   tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
 124   return tensor;
 125 }
 126
 127 Tensor makeOutputTensor(DataType element_type);
 128 Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
 129
 130 std::vector<int32_t> extractTensorShape(const Tensor &tensor);
 131
 132 // Returns the corresponding DataType given the type T.
 133 template <typename T> constexpr DataType getElementType()
 134 {
 135   if (std::is_same<T, float>::value)
 136     return DataType::FLOAT32;
 137   if (std::is_same<T, double>::value)
 138     return DataType::FLOAT64;
 139   if (std::is_same<T, uint8_t>::value)
 140     return DataType::U8;
 141   if (std::is_same<T, uint16_t>::value)
 142     return DataType::U16;
 143   if (std::is_same<T, uint32_t>::value)
 144     return DataType::U32;
 145   if (std::is_same<T, uint64_t>::value)
 146     return DataType::U64;
 147   if (std::is_same<T, int8_t>::value)
 148     return DataType::S8;
 149   if (std::is_same<T, int16_t>::value)
 150     return DataType::S16;
 151   if (std::is_same<T, int32_t>::value)
 152     return DataType::S32;
 153   if (std::is_same<T, int64_t>::value)
 154     return DataType::S64;
 155   if (std::is_same<T, bool>::value)
 156     return DataType::BOOL;
 157   return DataType::Unknown;
 158 }
 159
 160 template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
 161 {
 162   const auto *data_ptr = tensor.data<T>();
 163   return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
 164 }
 165
 166 std::vector<float> dequantizeTensorData(const Tensor &tensor);
 167
 168 // Array version of `::testing::FloatNear` matcher.
 169 ::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
 170                                                       float max_abs_error = 1.0e-5f);
 171
 172 template <typename T>
 173 std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point)
 174 {
 175   static_assert(std::is_integral<T>::value, "Integral type expected.");
 176
 177   float q_min{}, q_max{};
 178   if (std::is_signed<T>::value)
 179   {
 180     q_min = -std::numeric_limits<T>::max();
 181     q_max = std::numeric_limits<T>::max();
 182   }
 183   else
 184   {
 185     q_min = 0;
 186     q_max = std::numeric_limits<T>::max();
 187   }
 188
 189   std::vector<T> q;
 190   for (size_t i = 0; i < num_elements; ++i)
 191   {
 192     const auto &f = data[i];
 193     q.push_back(static_cast<T>(
 194       std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
 195   }
 196   return q;
 197 }
 198
 199 template <typename T>
 200 std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point)
 201 {
 202   static_assert(std::is_integral<T>::value, "Integral type expected.");
 203   std::vector<float> f;
 204   for (size_t i = 0; i < num_elements; ++i)
 205   {
 206     const T &q = data[i];
 207     f.push_back(scale * (q - zero_point));
 208   }
 209   return f;
 210 }
 211
 212 // NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned).
 213 template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
 214 {
 215   static_assert(std::is_integral<T>::value, "Integral type expected.");
 216   int32_t zero_point = 0;
 217   float scale = 0;
 218   const T qmin = std::numeric_limits<T>::lowest();
 219   const T qmax = std::numeric_limits<T>::max();
 220   const float qmin_double = qmin;
 221   const float qmax_double = qmax;
 222   // 0 should always be a representable value. Let's assume that the initial
 223   // min,max range contains 0.
 224   assert(f_max >= 0);
 225   assert(f_min <= 0);
 226   if (f_min == f_max)
 227   {
 228     // Special case where the min,max range is a point. Should be {0}.
 229     assert(f_max == 0);
 230     assert(f_min == 0);
 231     return {scale, zero_point};
 232   }
 233
 234   // General case.
 235   //
 236   // First determine the scale.
 237   scale = (f_max - f_min) / (qmax_double - qmin_double);
 238
 239   // Zero-point computation.
 240   // First the initial floating-point computation. The zero-point can be
 241   // determined from solving an affine equation for any known pair
 242   // (real value, corresponding quantized value).
 243   // We know two such pairs: (rmin, qmin) and (rmax, qmax).
 244   // The arithmetic error on the zero point computed from either pair
 245   // will be roughly machine_epsilon * (sum of absolute values of terms)
 246   // so we want to use the variant that adds the smaller terms.
 247   const float zero_point_from_min = qmin_double - f_min / scale;
 248   const float zero_point_from_max = qmax_double - f_max / scale;
 249
 250   const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
 251
 252   const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
 253
 254   const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
 255                                     ? zero_point_from_min
 256                                     : zero_point_from_max;
 257
 258   // Now we need to nudge the zero point to be an integer
 259   // (our zero points are integer, and this is motivated by the requirement
 260   // to be able to represent the real value "0" exactly as a quantized value,
 261   // which is required in multiple places, for example in Im2col with SAME
 262   //  padding).
 263
 264   T nudged_zero_point = 0;
 265   if (zero_point_double < qmin_double)
 266   {
 267     nudged_zero_point = qmin;
 268   }
 269   else if (zero_point_double > qmax_double)
 270   {
 271     nudged_zero_point = qmax;
 272   }
 273   else
 274   {
 275     nudged_zero_point = static_cast<T>(std::round(zero_point_double));
 276   }
 277
 278   // The zero point should always be in the range of quantized value,
 279   // // [qmin, qmax].
 280   assert(qmax >= nudged_zero_point);
 281   assert(qmin <= nudged_zero_point);
 282   zero_point = nudged_zero_point;
 283   // finally, return the values
 284   return {scale, zero_point};
 285 }
 286
 287 inline float getTolerance(float min, float max, int quantize_steps)
 288 {
 289   return ((max - min) / quantize_steps);
 290 }
 291
 292 } // namespace testing
 293 } // namespace kernels
 294 } // namespace luci_interpreter
 295
 296 #endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H