compiler/luci-interpreter/src/kernels/Mean.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "kernels/Mean.h"
  19
  20 #include "kernels/Utils.h"
  21
  22 #include <tensorflow/lite/kernels/internal/reference/reduce.h>
  23
  24 #include <stdexcept>
  25
  26 namespace luci_interpreter
  27 {
  28 namespace kernels
  29 {
  30
  31 static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
  32 {
  33   params->axis_count = num_axes;
  34   for (int i = 0; i < num_axes; ++i)
  35   {
  36     params->axis[i] = static_cast<int16>(axes_data[i]);
  37   }
  38   for (int i = num_axes; i < 4; ++i)
  39   {
  40     params->axis[i] = 1;
  41   }
  42 }
  43
  44 // Returns the number of axes that will be reduced. Removes duplicates.
  45 static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
  46 {
  47   int reduction_count = num_axes;
  48   for (int i = 0; i < num_axes; ++i)
  49   {
  50     int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
  51     assert(current >= 0 && current < input_num_dims);
  52     for (int j = 0; j < i; j++)
  53     {
  54       int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
  55       // This checks for duplicate axis
  56       if (current == previous)
  57       {
  58         --reduction_count;
  59         break;
  60       }
  61     }
  62   }
  63   return reduction_count;
  64 }
  65
  66 static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
  67                             bool keep_dims)
  68 {
  69   int input_num_dims = input_shape.num_dims();
  70   if (input_num_dims == 0)
  71   {
  72     return Shape(0);
  73   }
  74
  75   if (keep_dims)
  76   {
  77     Shape output_shape(input_num_dims);
  78     for (int idx = 0; idx < input_num_dims; ++idx)
  79     {
  80       bool is_axis = false;
  81       for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
  82       {
  83         if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
  84         {
  85           is_axis = true;
  86           break;
  87         }
  88       }
  89       if (is_axis)
  90       {
  91         output_shape.dim(idx) = 1;
  92       }
  93       else
  94       {
  95         output_shape.dim(idx) = input_shape.dim(idx);
  96       }
  97     }
  98     return output_shape;
  99   }
 100   else
 101   {
 102     int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
 103     Shape output_shape(input_num_dims - num_reduce_axes);
 104     int num_skip_axes = 0;
 105     for (int idx = 0; idx < input_num_dims; ++idx)
 106     {
 107       bool is_axis = false;
 108       for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
 109       {
 110         if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
 111         {
 112           ++num_skip_axes;
 113           is_axis = true;
 114           break;
 115         }
 116       }
 117       if (!is_axis)
 118       {
 119         output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
 120       }
 121     }
 122     return output_shape;
 123   }
 124 }
 125
 126 Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
 127            Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
 128   : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
 129                                     params)
 130 {
 131 }
 132
 133 void Mean::configure()
 134 {
 135   LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
 136   LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
 137   if (input()->element_type() == DataType::S16)
 138   {
 139     LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
 140   }
 141
 142   const Shape &input_shape = input()->shape();
 143   int input_num_dims = input_shape.num_dims();
 144
 145   const auto *axes_data = getTensorData<int32_t>(axes());
 146   int num_axes = axes()->shape().num_elements();
 147   assert(num_axes <= 4);
 148
 149   Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
 150   output()->resize(output_shape);
 151
 152   tflite::MeanParams params{};
 153   resolveAxes(axes_data, num_axes, &params);
 154   _need_temporaries = !(
 155     _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
 156     ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
 157   if (_need_temporaries)
 158   {
 159     auto temp_index = getOutputTensors()[1];
 160     auto resolved_axes = getOutputTensors()[2];
 161     auto temp_sum = getOutputTensors()[3];
 162
 163     temp_index->resize(Shape(input_num_dims));
 164     resolved_axes->resize(Shape(num_axes));
 165     temp_sum->resize(output()->shape());
 166   }
 167   else
 168   {
 169     auto temp_index = getOutputTensors()[1];
 170     auto resolved_axes = getOutputTensors()[2];
 171     auto temp_sum = getOutputTensors()[3];
 172
 173     temp_index->set_allocatable(false);
 174     resolved_axes->set_allocatable(false);
 175     temp_sum->set_allocatable(false);
 176   }
 177 }
 178
 179 void Mean::execute() const
 180 {
 181   switch (input()->element_type())
 182   {
 183     case DataType::FLOAT32:
 184       evalFloat();
 185       break;
 186     case DataType::U8:
 187       evalQuantized();
 188       break;
 189     case DataType::S16:
 190       evalQuantizedS16();
 191       break;
 192     default:
 193       throw std::runtime_error("Unsupported type.");
 194   }
 195 }
 196
 197 void Mean::evalFloat() const
 198 {
 199   const Shape &input_shape = input()->shape();
 200   int input_num_dims = input_shape.num_dims();
 201   const auto *axes_data = getTensorData<int32_t>(axes());
 202   int num_axes = axes()->shape().num_elements();
 203
 204   tflite::MeanParams params{};
 205   resolveAxes(axes_data, num_axes, &params);
 206
 207   auto temp_index = getOutputTensors()[1];
 208   auto resolved_axes = getOutputTensors()[2];
 209   auto temp_sum = getOutputTensors()[3];
 210
 211   // Defer to specialized implementation for 4D Mean across axes 1 & 2.
 212   if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
 213       ((params.axis[0] == 1 && params.axis[1] == 2) ||
 214        (params.axis[0] == 2 && params.axis[1] == 1)))
 215   {
 216     tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
 217                                 getTensorShape(output()), getTensorData<float>(output()));
 218   }
 219   else
 220   {
 221     tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
 222                                 input()->shape().num_dims(), getTensorData<float>(output()),
 223                                 getTensorShape(output()).DimsData(), output()->shape().num_dims(),
 224                                 axes_data, num_axes, _params.keep_dims,
 225                                 getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
 226                                 getTensorData<float>(temp_sum));
 227   }
 228 }
 229
 230 void Mean::evalQuantized() const
 231 {
 232   const Shape &input_shape = input()->shape();
 233   int input_num_dims = input_shape.num_dims();
 234   const auto *axes_data = getTensorData<int32_t>(axes());
 235   int num_axes = axes()->shape().num_elements();
 236
 237   tflite::MeanParams params{};
 238   resolveAxes(axes_data, num_axes, &params);
 239
 240   auto temp_index = getOutputTensors()[1];
 241   auto resolved_axes = getOutputTensors()[2];
 242   auto temp_sum = getOutputTensors()[3];
 243
 244   // Defer to specialized implementation for 4D Mean across axes 1 & 2.
 245   if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
 246       ((params.axis[0] == 1 && params.axis[1] == 2) ||
 247        (params.axis[0] == 2 && params.axis[1] == 1)))
 248   {
 249     tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
 250                                 input()->zero_point(), input()->scale(), getTensorShape(output()),
 251                                 getTensorData<uint8_t>(output()), output()->zero_point(),
 252                                 output()->scale());
 253   }
 254   else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
 255   {
 256     tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
 257                                 input()->shape().num_dims(), getTensorData<uint8_t>(output()),
 258                                 getTensorShape(output()).DimsData(), output()->shape().num_dims(),
 259                                 axes_data, num_axes, _params.keep_dims,
 260                                 getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
 261                                 getTensorData<int>(temp_sum));
 262   }
 263   else
 264   {
 265     tflite::reference_ops::QuantizedMeanOrSum<>(
 266       getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
 267       getTensorShape(input()).DimsData(), input()->shape().num_dims(),
 268       getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
 269       getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
 270       _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
 271       getTensorData<int>(temp_sum),
 272       /*compute_sum=*/false);
 273   }
 274 }
 275
 276 void Mean::evalQuantizedS16() const
 277 {
 278   const auto *input_data = getTensorData<int16_t>(input());
 279   auto *output_data = getTensorData<int16_t>(output());
 280
 281   const Shape &input_shape = input()->shape();
 282   const Shape &output_shape = output()->shape();
 283
 284   const auto *axes_data = getTensorData<int32_t>(axes());
 285   const int num_axes = axes()->shape().num_elements();
 286
 287   constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
 288   constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
 289
 290   // Defer to specialized implementation for 4D Mean across axes 1 & 2.
 291   if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
 292       ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
 293   {
 294     const int32_t batches = input_shape.dim(0);
 295     const int32_t input_height = input_shape.dim(1);
 296     const int32_t input_width = input_shape.dim(2);
 297     const int32_t depth = input_shape.dim(3);
 298     assert(output_shape.num_dims() == 4);
 299     assert(output_shape.dim(0) == batches);
 300     assert(output_shape.dim(1) == 1);
 301     assert(output_shape.dim(2) == 1);
 302     assert(output_shape.dim(3) == depth);
 303
 304     const double real_multiplier =
 305       static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
 306
 307     int32_t output_multiplier{};
 308     int output_shift{};
 309     quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
 310
 311     const int32_t num_elements_in_axes = input_height * input_width;
 312
 313     for (int32_t batch = 0; batch < batches; ++batch)
 314     {
 315       for (int32_t c = 0; c < depth; ++c)
 316       {
 317         int32_t acc = 0;
 318         for (int32_t in_y = 0; in_y < input_height; ++in_y)
 319         {
 320           for (int32_t in_x = 0; in_x < input_width; ++in_x)
 321           {
 322             acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
 323           }
 324         }
 325         int32_t scaled_acc =
 326           tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
 327         // Divide by the number of elements rounding to the nearest integer.
 328         scaled_acc = scaled_acc > 0
 329                        ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
 330                        : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
 331
 332         scaled_acc = std::max(scaled_acc, output_min);
 333         scaled_acc = std::min(scaled_acc, output_max);
 334
 335         output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
 336       }
 337     }
 338   }
 339   else
 340   {
 341     throw std::runtime_error("Unsupported configuration.");
 342   }
 343 }
 344
 345 } // namespace kernels
 346 } // namespace luci_interpreter