From: Shubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
Date: Wed, 17 Oct 2018 10:38:13 +0000 (+0530)
Subject: Add LRN codes to support both odd & even values (#2906)
X-Git-Tag: 0.3~593
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9363adfe5a18d15748e1d775b3c568007197d443;p=platform%2Fcore%2Fml%2Fnnfw.git

Add LRN codes to support both odd & even values (#2906)

This patch contains RS7-RuntimeNTools/ComputeLibrary#47 PR
Updated license contents for CLNormalizationLayerEx

Signed-off-by: shubham <shub98.gupta@samsung.com>
---

diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h
new file mode 100644
index 0000000..f782d2c
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class CLNormalizationLayerExKernel : public ICLKernel
+{
+public:
+  /** Constructor */
+  CLNormalizationLayerExKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete;
+  /** Default Move Constructor. */
+  CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default;
+  /** Default move assignment operator */
+  CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default;
+  /** Set the input and output tensors.
+   *
+   * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions
+   * [width, height, IFM],
+   *                       and an optional 4th dimension for batch of inputs. Data types supported:
+   * QS8/QS16/F16/F32.
+   * @param[out] output    Destination tensor. Output will have the same number of dimensions as
+   * input. Data types supported: same as @p input.
+   * @param[in]  norm_info Normalization layer information like the normalization type,
+   * normalization size and other parameters.
+   */
+  void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLNormalizationLayerKernel
+   *
+   * @param[in] input     Source tensor. 3 lower dims represent a single input with dimensions
+   * [width, height, IFM],
+   *                      and an optional 4th dimension for batch of inputs. Data types supported:
+   * QS8/QS16/F16/F32.
+   * @param[in] output    Destination tensor. Output will have the same number of dimensions as
+   * input. Data types supported: same as @p input.
+   * @param[in] norm_info Normalization layer information like the normalization type, normalization
+   * size and other parameters.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         NormalizationLayerInfo norm_info);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+  BorderSize border_size() const override;
+
+private:
+  const ICLTensor *_input;
+  ICLTensor *_output;
+  BorderSize _border_size;
+  bool _is_in_map;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h
new file mode 100644
index 0000000..15797c9
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
+#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute a normalization layer. This function calls the following CL kernels:
+ *
+ * -# @ref CLFillBorderKernel
+ * -# @ref CLNormalizationLayerKernelEx
+ *
+ */
+class CLNormalizationLayerEx : public IFunction
+{
+public:
+  /** Default constructor */
+  CLNormalizationLayerEx();
+  /** Set the input and output tensors.
+   *
+   * @param[in, out] input     Source tensor. 3 lower dims represent a single input with dimensions
+   * [width, height, IFM],
+   *                           and an optional 4th dimension for batch of inputs. Data types
+   * supported: QS8/QS16/F16/F32 (Written to by the border handler)
+   * @param[out]     output    Destination tensor. Dimensions, data type and number of channels must
+   * match the input ones.
+   * @param[in]      norm_info Normalization layer information like the normalization type,
+   * normalization size and other parameters.
+   */
+  void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLNormalizationLayer
+   *
+   * @param[in] input     Source tensor. 3 lower dims represent a single input with dimensions
+   * [width, height, IFM],
+   *                      and an optional 4th dimension for batch of inputs. Data types supported:
+   * QS8/QS16/F16/F32
+   * @param[in] output    Destination tensor. Dimensions, data type and number of channels must
+   * match the input ones.
+   * @param[in] norm_info Normalization layer information like the normalization type, normalization
+   * size and other parameters.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const NormalizationLayerInfo &norm_info);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  CLNormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel to run */
+  CLFillBorderKernel _border_handler;        /**< Kernel to handle  borders */
+};
+}
+#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp
new file mode 100644
index 0000000..9ff5401
--- /dev/null
+++ b/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+                          NormalizationLayerInfo norm_info)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16,
+                                                       DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+
+  if (is_data_type_fixed_point(input->data_type()))
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input);
+    ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input);
+    ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(),
+                                                                       input);
+  }
+
+  // Checks performed when output is configured
+  if (output->total_size() != 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+  }
+
+  return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
+                                                        NormalizationLayerInfo norm_info)
+{
+  // Output tensor auto initialization if not yet initialized
+  auto_init_if_empty(*output, *input->clone());
+
+  const unsigned int norm_size = norm_info.norm_size();
+  bool is_in_map = norm_info.is_in_map();
+
+  const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
+  const BorderSize border_size = BorderSize(0, border_width);
+
+  const unsigned int num_elems_processed_per_iteration =
+      (is_data_type_fixed_point(input->data_type())) ? 16 : 4;
+  const unsigned int num_elems_read_per_iteration =
+      is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2))
+                : num_elems_processed_per_iteration;
+
+  Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+
+  // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside
+  // the kernel, avoiding padding
+  AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration);
+  AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+  bool window_changed = update_window_and_padding(win, input_access, output_access);
+
+  output_access.set_valid_region(win, input->valid_region());
+
+  Status err = (window_changed)
+                   ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                   : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+CLNormalizationLayerExKernel::CLNormalizationLayerExKernel()
+    : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false)
+{
+}
+
+BorderSize CLNormalizationLayerExKernel::border_size() const { return _border_size; }
+
+void CLNormalizationLayerExKernel::configure(const ICLTensor *input, ICLTensor *output,
+                                             NormalizationLayerInfo norm_info)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+  // Output tensor auto initialization if not yet initialized
+  auto_init_if_empty(*output->info(), *input->info()->clone());
+
+  // Perform validation step
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info));
+
+  _input = input;
+  _output = output;
+
+  _is_in_map = norm_info.is_in_map();
+  const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
+  _border_size = BorderSize(0, border_width);
+
+  const unsigned int num_elems_processed_per_iteration =
+      (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4;
+  const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
+
+  // Set build options
+  CLBuildOptions build_opts;
+  build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+  build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()),
+                           "-DFIXED_POINT_POSITION=" +
+                               support::cpp11::to_string(input->info()->fixed_point_position()));
+  build_opts.add_option(
+      ("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
+  build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
+  build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
+  build_opts.add_option(
+      ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+  build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2)));
+  build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
+  build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
+
+  // Create kernel
+  std::string kernel_name =
+      _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map";
+  _kernel = static_cast<cl::Kernel>(
+      CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+
+  // Configure kernel window
+  auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  ICLKernel::configure(win_config.second);
+
+  // Set config_id for enabling LWS tuning
+  _config_id = "normalization_layer_";
+  _config_id += lower_string(string_from_data_type(input->info()->data_type()));
+  _config_id += "_";
+  _config_id += support::cpp11::to_string(
+      static_cast<std::underlying_type<NormType>::type>(norm_info.type()));
+  _config_id += "_";
+  _config_id += support::cpp11::to_string(norm_info.norm_size());
+  _config_id += "_";
+  _config_id += support::cpp11::to_string(input->info()->dimension(0));
+  _config_id += "_";
+  _config_id += support::cpp11::to_string(input->info()->dimension(1));
+}
+
+Status CLNormalizationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
+                                              NormalizationLayerInfo norm_info)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info));
+  ARM_COMPUTE_RETURN_ON_ERROR(
+      validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
+
+  return Status{};
+}
+
+void CLNormalizationLayerExKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+  const int collapsed_dimension = _is_in_map ? Window::DimZ : 4;
+  Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension);
+  Window slice = window_collapsed.first_slice_window_3D();
+
+  do
+  {
+    unsigned int idx = 0;
+    add_3D_tensor_argument(idx, _input, slice);
+    add_3D_tensor_argument(idx, _output, slice);
+    enqueue(queue, *this, slice);
+  } while (window_collapsed.slide_window_slice_3D(slice));
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp
new file mode 100644
index 0000000..68ccdc8
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+using namespace arm_compute;
+
+CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {}
+
+void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
+                                       const NormalizationLayerInfo &norm_info)
+{
+  ARM_COMPUTE_ERROR_ON(input == nullptr);
+
+  // Configure normalization kernel
+  _norm_kernel.configure(input, output, norm_info);
+
+  // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
+  _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+}
+
+Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+                                        const NormalizationLayerInfo &norm_info)
+{
+  return CLNormalizationLayerExKernel::validate(input, output, norm_info);
+}
+
+void CLNormalizationLayerEx::run()
+{
+  // Run border handler
+  CLScheduler::get().enqueue(_border_handler, false);
+
+  // Run normalization kernel
+  CLScheduler::get().enqueue(_norm_kernel);
+}
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index 588a284..e181129 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -48,7 +48,7 @@
 #include <arm_compute/runtime/CL/functions/CLRNNLayer.h>
 #include <arm_compute/runtime/CL/functions/CLFloor.h>
 #include <arm_compute/runtime/CL/functions/CLCopy.h>
-#include <arm_compute/runtime/CL/functions/CLNormalizationLayer.h>
+#include <arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLExp.h>
 #include <arm_compute/runtime/CL/functions/CLSquaredDifference.h>
 #include <arm_compute/runtime/CL/functions/CLNeg.h>
@@ -3830,7 +3830,7 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>();
+      auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayerEx>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
 
@@ -4255,7 +4255,7 @@ void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::No
                                               param.alpha, param.beta, param.bias, false);
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>();
+      auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayerEx>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);