src/core/NEON/kernels/NENormalizationLayerKernel.cpp

   1 /*
   2  * Copyright (c) 2017 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
  25
  26 #include "arm_compute/core/Helpers.h"
  27 #include "arm_compute/core/NEON/NEMath.h"
  28 #include "arm_compute/core/TensorInfo.h"
  29 #include "arm_compute/core/Utils.h"
  30 #include "arm_compute/core/Validate.h"
  31 #include "arm_compute/core/Window.h"
  32
  33 using namespace arm_compute;
  34
  35 NENormalizationLayerKernel::NENormalizationLayerKernel()
  36     : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr), _norm_info(NormType::IN_MAP), _border_size()
  37 {
  38 }
  39
  40 BorderSize NENormalizationLayerKernel::border_size() const
  41 {
  42     return _border_size;
  43 }
  44
  45 void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info)
  46 {
  47     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
  48     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
  49     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
  50     ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
  51
  52     const unsigned int border_width = (norm_info.type() == NormType::IN_MAP) ? 3 : 0;
  53
  54     _input         = input;
  55     _input_squared = input_squared;
  56     _output        = output;
  57     _norm_info     = norm_info;
  58     _func          = (norm_info.type() == NormType::IN_MAP) ? &NENormalizationLayerKernel::normalize<0> : &NENormalizationLayerKernel::normalize<2>;
  59     _border_size   = BorderSize(0, border_width);
  60
  61     constexpr unsigned int num_elems_processed_per_iteration = 4;
  62     const unsigned int     num_elems_read_per_iteration      = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
  63
  64     // Configure window
  65     Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
  66
  67     AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration);
  68     AccessWindowHorizontal input_squared_access(input_squared->info(), -_border_size.left, num_elems_read_per_iteration);
  69     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
  70
  71     update_window_and_padding(win, input_access, input_squared_access, output_access);
  72
  73     output_access.set_valid_region(win, input->info()->valid_region());
  74
  75     INEKernel::configure(win);
  76 }
  77
  78 template <unsigned int dim>
  79 void NENormalizationLayerKernel::normalize(const Window &window)
  80 {
  81     Iterator input(_input, window);
  82     Iterator input_squared(_input_squared, window);
  83     Iterator output(_output, window);
  84
  85     const int radius               = _norm_info.norm_size() / 2;
  86     const int total_size           = _input->info()->dimension(dim) - 1;
  87     const int input_squared_stride = _input_squared->info()->strides_in_bytes()[dim];
  88     // We account padding when we normalize across X
  89     const int min_left  = (dim == 0) ? -static_cast<int>(border_size().left) : 0;
  90     const int max_right = (dim == 0) ? total_size + border_size().left : total_size;
  91
  92     const float32x4_t coeff_vec = vdupq_n_f32(_norm_info.scale_coeff());
  93     const float32x4_t beta_vec  = vdupq_n_f32(_norm_info.beta());
  94     const float32x4_t kappa_vec = vdupq_n_f32(_norm_info.kappa());
  95
  96     execute_window_loop(window, [&](const Coordinates & id)
  97     {
  98         // Get range to normalize
  99         const int current_slice = id[dim];
 100         const int first_slice   = std::max(current_slice - radius, min_left);
 101         const int last_slice    = std::min(current_slice + radius, max_right);
 102
 103         // Accumulate cross map values
 104         float32x4_t accu = vdupq_n_f32(0.f);
 105         for(int i = first_slice; i <= last_slice; ++i)
 106         {
 107             accu = vaddq_f32(accu, vld1q_f32(reinterpret_cast<float *>(input_squared.ptr() + (i - current_slice) * input_squared_stride)));
 108         }
 109
 110         // Normalize
 111         const float32x4_t normalized       = vpowq_f32(vmlaq_f32(kappa_vec, coeff_vec, accu), beta_vec);
 112         const float32x4_t normalized_pixel = vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr())), vinv_f32(normalized));
 113         vst1q_f32(reinterpret_cast<float *>(output.ptr()), normalized_pixel);
 114     },
 115     input, input_squared, output);
 116 }
 117
 118 void NENormalizationLayerKernel::run(const Window &window)
 119 {
 120     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
 121     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 122     ARM_COMPUTE_ERROR_ON(_func == nullptr);
 123
 124     // Run function
 125     (this->*_func)(window);
 126 }