--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class CLNormalizationLayerExKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLNormalizationLayerExKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete;
+ /** Default Move Constructor. */
+ CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default;
+ /** Default move assignment operator */
+ CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
+ * [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported:
+ * QS8/QS16/F16/F32.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as
+ * input. Data types supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type,
+ * normalization size and other parameters.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLNormalizationLayerKernel
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
+ * [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported:
+ * QS8/QS16/F16/F32.
+ * @param[in] output Destination tensor. Output will have the same number of dimensions as
+ * input. Data types supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization
+ * size and other parameters.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ NormalizationLayerInfo norm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ BorderSize _border_size;
+ bool _is_in_map;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
+#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute a normalization layer. This function calls the following CL kernels:
+ *
+ * -# @ref CLFillBorderKernel
+ * -# @ref CLNormalizationLayerKernelEx
+ *
+ */
+class CLNormalizationLayerEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLNormalizationLayerEx();
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions
+ * [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types
+ * supported: QS8/QS16/F16/F32 (Written to by the border handler)
+ * @param[out] output Destination tensor. Dimensions, data type and number of channels must
+ * match the input ones.
+ * @param[in] norm_info Normalization layer information like the normalization type,
+ * normalization size and other parameters.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLNormalizationLayer
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
+ * [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported:
+ * QS8/QS16/F16/F32
+ * @param[in] output Destination tensor. Dimensions, data type and number of channels must
+ * match the input ones.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization
+ * size and other parameters.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const NormalizationLayerInfo &norm_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLNormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel to run */
+ CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
+};
+}
+#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+ NormalizationLayerInfo norm_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16,
+ DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+
+ if (is_data_type_fixed_point(input->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input);
+ ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input);
+ ARM_COMPUTE_RETURN_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(),
+ input);
+ }
+
+ // Checks performed when output is configured
+ if (output->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
+ NormalizationLayerInfo norm_info)
+{
+ // Output tensor auto initialization if not yet initialized
+ auto_init_if_empty(*output, *input->clone());
+
+ const unsigned int norm_size = norm_info.norm_size();
+ bool is_in_map = norm_info.is_in_map();
+
+ const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
+ const BorderSize border_size = BorderSize(0, border_width);
+
+ const unsigned int num_elems_processed_per_iteration =
+ (is_data_type_fixed_point(input->data_type())) ? 16 : 4;
+ const unsigned int num_elems_read_per_iteration =
+ is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2))
+ : num_elems_processed_per_iteration;
+
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+
+ // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside
+ // the kernel, avoiding padding
+ AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration);
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+ bool window_changed = update_window_and_padding(win, input_access, output_access);
+
+ output_access.set_valid_region(win, input->valid_region());
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+CLNormalizationLayerExKernel::CLNormalizationLayerExKernel()
+ : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false)
+{
+}
+
+BorderSize CLNormalizationLayerExKernel::border_size() const { return _border_size; }
+
+void CLNormalizationLayerExKernel::configure(const ICLTensor *input, ICLTensor *output,
+ NormalizationLayerInfo norm_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ // Output tensor auto initialization if not yet initialized
+ auto_init_if_empty(*output->info(), *input->info()->clone());
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info));
+
+ _input = input;
+ _output = output;
+
+ _is_in_map = norm_info.is_in_map();
+ const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
+ _border_size = BorderSize(0, border_width);
+
+ const unsigned int num_elems_processed_per_iteration =
+ (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4;
+ const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
+
+ // Set build options
+ CLBuildOptions build_opts;
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()),
+ "-DFIXED_POINT_POSITION=" +
+ support::cpp11::to_string(input->info()->fixed_point_position()));
+ build_opts.add_option(
+ ("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
+ build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
+ build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
+ build_opts.add_option(
+ ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2)));
+ build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
+ build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
+
+ // Create kernel
+ std::string kernel_name =
+ _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map";
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure(win_config.second);
+
+ // Set config_id for enabling LWS tuning
+ _config_id = "normalization_layer_";
+ _config_id += lower_string(string_from_data_type(input->info()->data_type()));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(
+ static_cast<std::underlying_type<NormType>::type>(norm_info.type()));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(norm_info.norm_size());
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(input->info()->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(input->info()->dimension(1));
+}
+
+Status CLNormalizationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
+ NormalizationLayerInfo norm_info)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
+
+ return Status{};
+}
+
+void CLNormalizationLayerExKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+ const int collapsed_dimension = _is_in_map ? Window::DimZ : 4;
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension);
+ Window slice = window_collapsed.first_slice_window_3D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, slice);
+ add_3D_tensor_argument(idx, _output, slice);
+ enqueue(queue, *this, slice);
+ } while (window_collapsed.slide_window_slice_3D(slice));
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+using namespace arm_compute;
+
+CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {}
+
+void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
+ const NormalizationLayerInfo &norm_info)
+{
+ ARM_COMPUTE_ERROR_ON(input == nullptr);
+
+ // Configure normalization kernel
+ _norm_kernel.configure(input, output, norm_info);
+
+ // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
+ _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+}
+
+Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const NormalizationLayerInfo &norm_info)
+{
+ return CLNormalizationLayerExKernel::validate(input, output, norm_info);
+}
+
+void CLNormalizationLayerEx::run()
+{
+ // Run border handler
+ CLScheduler::get().enqueue(_border_handler, false);
+
+ // Run normalization kernel
+ CLScheduler::get().enqueue(_norm_kernel);
+}
#include <arm_compute/runtime/CL/functions/CLRNNLayer.h>
#include <arm_compute/runtime/CL/functions/CLFloor.h>
#include <arm_compute/runtime/CL/functions/CLCopy.h>
-#include <arm_compute/runtime/CL/functions/CLNormalizationLayer.h>
+#include <arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLExp.h>
#include <arm_compute/runtime/CL/functions/CLSquaredDifference.h>
#include <arm_compute/runtime/CL/functions/CLNeg.h>
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>();
+ auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayerEx>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
param.alpha, param.beta, param.bias, false);
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>();
+ auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayerEx>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);