Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / pooling_gpu.cpp
1 /*
2 // Copyright (c) 2019 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "pooling_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "kernel_selector_helper.h"
22 #include "pooling/pooling_kernel_selector.h"
23 #include "pooling/pooling_kernel_base.h"
24
25 namespace cldnn { namespace gpu {
26
27 namespace
28 {
29     void validate_args(const pooling_node& arg)
30     {
31         auto const& input_buffer_size = arg.input().get_output_layout().get_buffer_size();
32         auto const& input_dimensions = input_buffer_size.batch.size() + input_buffer_size.feature.size() + input_buffer_size.spatial.size();
33         auto const& output_buffer_size = arg.get_output_layout().get_buffer_size();
34         auto const& output_dimensions = output_buffer_size.batch.size() + output_buffer_size.feature.size() + output_buffer_size.spatial.size();
35         auto& stride = arg.get_primitive()->stride;
36         auto const& stride_dimensions = stride.batch.size() + stride.feature.size() + stride.spatial.size();
37         auto& window = arg.get_primitive()->size;
38         auto const& window_dimensions = window.batch.size() + window.feature.size() + window.spatial.size();
39
40         CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_dimensions, "output dimensions", output_dimensions, "");
41         CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_dimensions, "output dimensions", output_dimensions, "");
42         CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_dimensions, "output dimensions", output_dimensions, "");
43     }
44
45     kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode)
46     {
47         switch (mode)
48         {
49         case pooling_mode::max:
50             return kernel_selector::pool_type::MAX;
51         case pooling_mode::average:
52             return kernel_selector::pool_type::AVG;
53         case pooling_mode::average_no_padding:
54             return kernel_selector::pool_type::AVG;
55         case pooling_mode::max_with_argmax:
56             return kernel_selector::pool_type::MAX_WITH_ARGMAX;
57         default:
58             assert(0);
59             return kernel_selector::pool_type::MAX;
60         }
61     }
62
63     kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mode)
64     {
65         switch (mode)
66         {
67         case pooling_mode::max:
68         case pooling_mode::max_with_argmax:
69             return kernel_selector::kernel_divider_mode::DONT_CARE;
70         case pooling_mode::average:
71             return kernel_selector::kernel_divider_mode::FIXED;
72         case pooling_mode::average_no_padding:
73             return kernel_selector::kernel_divider_mode::DYNAMIC;
74         default:
75             assert(0);
76             return kernel_selector::kernel_divider_mode::DONT_CARE;
77         }
78     }
79 }
80
81 struct pooling_gpu : typed_primitive_gpu_impl<pooling>
82 {
83     using parent = typed_primitive_gpu_impl<pooling>;
84     using parent::parent;
85
86 protected:
87
88     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<pooling>& instance, int32_t split) const override
89     {
90         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
91         if(!instance.argument.argmax.empty())
92             args.inputs.push_back(&instance.dep_memory(1));
93         return args;
94     }
95
96 public:
97
98     static primitive_impl* create(const pooling_node& arg)
99     {
100         validate_args(arg);
101
102         auto pool_params            = get_default_params<kernel_selector::pooling_params>(arg);
103         auto pool_optional_params   = get_default_optional_params<kernel_selector::pooling_optional_params>(arg.get_program());
104
105         const auto primitive        = arg.get_primitive();
106         const auto& stride          = primitive->stride;
107         const auto& input_offset    = primitive->input_offset;
108         const auto& input_sizes     = arg.input().get_output_layout().size;
109         const auto& output_sizes    = arg.get_output_layout().size;
110
111         auto& pp                    = pool_params;
112
113         pp.poolType                 = cldnn_2_pool_type(primitive->mode);
114         pp.remainderAction          = kernel_selector::pool_remainder::CEIL;
115
116         if (primitive->global_pooling) {
117             primitive->size.spatial[0] = input_sizes.spatial[0];
118             primitive->size.spatial[1] = input_sizes.spatial[1];
119         }
120
121         //check if last pooling window goes outside of input size + padding. If so the avg pooling size will be adjusted to that.
122         auto dynamic_mode = (((output_sizes.spatial[0] - 1) * stride.spatial[0]) + primitive->size.spatial[0]) > -2 * input_offset.spatial[0] + input_sizes.spatial[0] ||
123             (((output_sizes.spatial[1] - 1) * stride.spatial[1]) + primitive->size.spatial[1]) > -2 * input_offset.spatial[1] + input_sizes.spatial[1];
124
125         if (primitive->mode == pooling_mode::average && dynamic_mode)
126             pp.divMode = kernel_selector::kernel_divider_mode::DYNAMIC_WITH_PADDING;
127         else
128             pp.divMode = cldnn_2_kernel_divider_mode(primitive->mode);
129
130         const auto additional_offset = tensor::max(input_offset, 0);
131         if (additional_offset != 0)
132         {
133             const auto& input_layout = arg.input().get_output_layout();
134             pool_params.inputs[0] = convert_data_tensor(input_layout, 1, additional_offset);
135         }
136         
137         if (primitive->mode == pooling_mode::max_with_argmax)
138             pool_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout()));
139
140         pp.poolSize = {
141             (uint32_t)primitive->size.spatial[0],
142             (uint32_t)primitive->size.spatial[1],
143         };
144
145         pp.poolPad = {
146             (uint32_t)std::max(-input_offset.spatial[0], 0),
147             (uint32_t)std::max(-input_offset.spatial[1], 0)
148         };
149
150         pp.poolStride = {
151             (uint32_t)stride.spatial[0],
152             (uint32_t)stride.spatial[1]
153         };
154
155         auto& kernel_selector   = kernel_selector::pooling_kernel_selector::Instance();
156         auto best_kernels       = kernel_selector.GetBestKernels(pool_params, pool_optional_params);
157
158         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
159
160         auto pool = new pooling_gpu(arg, best_kernels[0]);
161
162         return pool;
163     }
164 };
165
166 namespace {
167     struct attach {
168         attach()
169         {
170             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), pooling_gpu::create);
171             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), pooling_gpu::create);
172             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), pooling_gpu::create);
173             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), pooling_gpu::create);
174             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), pooling_gpu::create);
175             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), pooling_gpu::create);
176             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), pooling_gpu::create);
177             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
178             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
179             // MMAD
180             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), pooling_gpu::create);
181             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), pooling_gpu::create);
182             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
183             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);
184         }
185         ~attach() {}
186     };
187     attach attach_impl;
188 }
189 } }