2 // Copyright (c) 2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "pooling_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "kernel_selector_helper.h"
22 #include "pooling/pooling_kernel_selector.h"
23 #include "pooling/pooling_kernel_base.h"
25 namespace cldnn { namespace gpu {
29 void validate_args(const pooling_node& arg)
31 auto const& input_buffer_size = arg.input().get_output_layout().get_buffer_size();
32 auto const& input_dimensions = input_buffer_size.batch.size() + input_buffer_size.feature.size() + input_buffer_size.spatial.size();
33 auto const& output_buffer_size = arg.get_output_layout().get_buffer_size();
34 auto const& output_dimensions = output_buffer_size.batch.size() + output_buffer_size.feature.size() + output_buffer_size.spatial.size();
35 auto& stride = arg.get_primitive()->stride;
36 auto const& stride_dimensions = stride.batch.size() + stride.feature.size() + stride.spatial.size();
37 auto& window = arg.get_primitive()->size;
38 auto const& window_dimensions = window.batch.size() + window.feature.size() + window.spatial.size();
40 CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_dimensions, "output dimensions", output_dimensions, "");
41 CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_dimensions, "output dimensions", output_dimensions, "");
42 CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_dimensions, "output dimensions", output_dimensions, "");
45 kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode)
49 case pooling_mode::max:
50 return kernel_selector::pool_type::MAX;
51 case pooling_mode::average:
52 return kernel_selector::pool_type::AVG;
53 case pooling_mode::average_no_padding:
54 return kernel_selector::pool_type::AVG;
55 case pooling_mode::max_with_argmax:
56 return kernel_selector::pool_type::MAX_WITH_ARGMAX;
59 return kernel_selector::pool_type::MAX;
63 kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mode)
67 case pooling_mode::max:
68 case pooling_mode::max_with_argmax:
69 return kernel_selector::kernel_divider_mode::DONT_CARE;
70 case pooling_mode::average:
71 return kernel_selector::kernel_divider_mode::FIXED;
72 case pooling_mode::average_no_padding:
73 return kernel_selector::kernel_divider_mode::DYNAMIC;
76 return kernel_selector::kernel_divider_mode::DONT_CARE;
81 struct pooling_gpu : typed_primitive_gpu_impl<pooling>
83 using parent = typed_primitive_gpu_impl<pooling>;
88 virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<pooling>& instance, int32_t split) const override
90 kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
91 if(!instance.argument.argmax.empty())
92 args.inputs.push_back(&instance.dep_memory(1));
98 static primitive_impl* create(const pooling_node& arg)
102 auto pool_params = get_default_params<kernel_selector::pooling_params>(arg);
103 auto pool_optional_params = get_default_optional_params<kernel_selector::pooling_optional_params>(arg.get_program());
105 const auto primitive = arg.get_primitive();
106 const auto& stride = primitive->stride;
107 const auto& input_offset = primitive->input_offset;
108 const auto& input_sizes = arg.input().get_output_layout().size;
109 const auto& output_sizes = arg.get_output_layout().size;
111 auto& pp = pool_params;
113 pp.poolType = cldnn_2_pool_type(primitive->mode);
114 pp.remainderAction = kernel_selector::pool_remainder::CEIL;
116 if (primitive->global_pooling) {
117 primitive->size.spatial[0] = input_sizes.spatial[0];
118 primitive->size.spatial[1] = input_sizes.spatial[1];
121 //check if last pooling window goes outside of input size + padding. If so the avg pooling size will be adjusted to that.
122 auto dynamic_mode = (((output_sizes.spatial[0] - 1) * stride.spatial[0]) + primitive->size.spatial[0]) > -2 * input_offset.spatial[0] + input_sizes.spatial[0] ||
123 (((output_sizes.spatial[1] - 1) * stride.spatial[1]) + primitive->size.spatial[1]) > -2 * input_offset.spatial[1] + input_sizes.spatial[1];
125 if (primitive->mode == pooling_mode::average && dynamic_mode)
126 pp.divMode = kernel_selector::kernel_divider_mode::DYNAMIC_WITH_PADDING;
128 pp.divMode = cldnn_2_kernel_divider_mode(primitive->mode);
130 const auto additional_offset = tensor::max(input_offset, 0);
131 if (additional_offset != 0)
133 const auto& input_layout = arg.input().get_output_layout();
134 pool_params.inputs[0] = convert_data_tensor(input_layout, 1, additional_offset);
137 if (primitive->mode == pooling_mode::max_with_argmax)
138 pool_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout()));
141 (uint32_t)primitive->size.spatial[0],
142 (uint32_t)primitive->size.spatial[1],
146 (uint32_t)std::max(-input_offset.spatial[0], 0),
147 (uint32_t)std::max(-input_offset.spatial[1], 0)
151 (uint32_t)stride.spatial[0],
152 (uint32_t)stride.spatial[1]
155 auto& kernel_selector = kernel_selector::pooling_kernel_selector::Instance();
156 auto best_kernels = kernel_selector.GetBestKernels(pool_params, pool_optional_params);
158 CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
160 auto pool = new pooling_gpu(arg, best_kernels[0]);
170 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), pooling_gpu::create);
171 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), pooling_gpu::create);
172 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), pooling_gpu::create);
173 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), pooling_gpu::create);
174 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), pooling_gpu::create);
175 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), pooling_gpu::create);
176 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), pooling_gpu::create);
177 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
178 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
180 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), pooling_gpu::create);
181 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), pooling_gpu::create);
182 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
183 implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);