1 // Copyright (c) 2016-2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "kernel_selector_helper.h"
16 #include "kernel_selector_params.h"
18 #include "gpu/ocl_toolkit.h"
20 #include "program_node.h"
21 #include "program_impl.h"
23 #include "training_params.h"
25 kernel_selector::data_type to_data_type(data_types dt)
29 case cldnn::data_types::i8: return kernel_selector::data_type::INT8;
30 case cldnn::data_types::u8: return kernel_selector::data_type::UINT8;
31 case cldnn::data_types::i32: return kernel_selector::data_type::INT32;
32 case cldnn::data_types::i64: return kernel_selector::data_type::INT64;
33 case cldnn::data_types::f16: return kernel_selector::data_type::F16;
34 case cldnn::data_types::f32: return kernel_selector::data_type::F32;
37 return kernel_selector::data_type::F16;
41 data_types from_data_type(kernel_selector::data_type dt)
45 case kernel_selector::data_type::INT8: return cldnn::data_types::i8;
46 case kernel_selector::data_type::UINT8: return cldnn::data_types::u8;
47 case kernel_selector::data_type::INT32: return cldnn::data_types::i32;
48 case kernel_selector::data_type::INT64: return cldnn::data_types::i64;
49 case kernel_selector::data_type::F16: return cldnn::data_types::f16;
50 case kernel_selector::data_type::F32: return cldnn::data_types::f32;
53 return cldnn::data_types::f16;
57 kernel_selector::weights_type to_weights_type(data_types dt)
61 case cldnn::data_types::i8: return kernel_selector::weights_type::INT8;
62 case cldnn::data_types::u8: return kernel_selector::weights_type::UINT8;
63 case cldnn::data_types::f16: return kernel_selector::weights_type::F16;
64 case cldnn::data_types::f32: return kernel_selector::weights_type::F32;
67 return kernel_selector::weights_type::F16;
71 data_types from_weights_type(kernel_selector::weights_type dt)
75 case kernel_selector::weights_type::INT8: return data_types::i8;
76 case kernel_selector::weights_type::UINT8: return data_types::u8;
77 case kernel_selector::weights_type::F16: return data_types::f16;
78 case kernel_selector::weights_type::F32: return data_types::f32;
81 return data_types::f16;;
85 kernel_selector::data_layout to_data_layout(format f)
89 case format::bfyx: return kernel_selector::data_layout::bfyx;
90 case format::yxfb: return kernel_selector::data_layout::yxfb;
91 case format::byxf: return kernel_selector::data_layout::byxf;
92 case format::fyxb: return kernel_selector::data_layout::fyxb;
93 case format::bs_x_bsv16: return kernel_selector::data_layout::bs_f_bsv16__af8;
94 case format::bs_xs_xsv8_bsv8: return kernel_selector::data_layout::bs_f_bsv8__af8;
95 case format::bs_xs_xsv8_bsv16: return kernel_selector::data_layout::bs_f_bsv16__af8;
96 case format::bf8_xy16: return kernel_selector::data_layout::bf8_xy16;
97 case format::winograd_2x3_s1_data: return kernel_selector::data_layout::winograd_2x3_s1_data;
98 case format::byxf_af32: return kernel_selector::data_layout::byxf_af32;
99 case format::byx8_f4: return kernel_selector::data_layout::byx8_f4;
100 case format::fs_bs_yx_bsv4_fsv32: return kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32;
101 // case format::brfyx: return kernel_selector::data_layout::brfyx;
102 case format::b_fs_yx_fsv4: return kernel_selector::data_layout::b_fs_yx_fsv4;
104 return kernel_selector::data_layout::bfyx;
108 cldnn::format from_data_layout(kernel_selector::data_layout l)
112 case kernel_selector::data_layout::bf: return cldnn::format::bfyx;
113 case kernel_selector::data_layout::fb: return cldnn::format::fyxb;
114 case kernel_selector::data_layout::bfyx: return cldnn::format::bfyx;
115 case kernel_selector::data_layout::yxfb: return cldnn::format::yxfb;
116 case kernel_selector::data_layout::byxf: return cldnn::format::byxf;
117 case kernel_selector::data_layout::fyxb: return cldnn::format::fyxb;
118 case kernel_selector::data_layout::bs_f_bsv8__af8: return cldnn::format::bs_xs_xsv8_bsv8;
119 case kernel_selector::data_layout::bs_f_bsv16__af8: return cldnn::format::bs_x_bsv16;
120 case kernel_selector::data_layout::bf8_xy16: return cldnn::format::bf8_xy16;
121 case kernel_selector::data_layout::brfyx: return cldnn::format::bfyx;
122 case kernel_selector::data_layout::winograd_2x3_s1_data: return cldnn::format::winograd_2x3_s1_data;
123 case kernel_selector::data_layout::byxf_af32: return cldnn::format::byxf_af32;
124 case kernel_selector::data_layout::byx8_f4: return cldnn::format::byx8_f4;
125 case kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32: return cldnn::format::fs_bs_yx_bsv4_fsv32;
127 return cldnn::format::bfyx;
132 kernel_selector::weights_layout to_weights_layout(format f)
136 case format::bfyx: return kernel_selector::weights_layout::oiyx;
137 case format::fyxb: return kernel_selector::weights_layout::iyxo;
138 case format::byxf: return kernel_selector::weights_layout::oyxi;
139 case format::yxfb: return kernel_selector::weights_layout::yxio;
140 case format::os_iyx_osv16: return kernel_selector::weights_layout::os_iyx_osv16;
141 case format::os_iyx_osv32: return kernel_selector::weights_layout::os_iyx_osv32;
142 case format::os_iyx_osv64: return kernel_selector::weights_layout::os_iyx_osv64;
143 case format::bs_xs_xsv8_bsv8: return kernel_selector::weights_layout::os_i_osv8__ai8;
144 case format::bs_xs_xsv8_bsv16: return kernel_selector::weights_layout::os_i_osv16__ai8;
145 case format::bs_x_bsv16: return kernel_selector::weights_layout::os_i_osv16;
146 case format::image_2d_weights_c4_fyx_b: return kernel_selector::weights_layout::image_2d_weights_c4_fyx_b;
147 case format::image_2d_weights_c1_b_fyx: return kernel_selector::weights_layout::image_2d_weights_c1_b_fyx;
148 case format::winograd_2x3_s1_weights: return kernel_selector::weights_layout::winograd_2x3_s1_weights;
149 case format::winograd_2x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_2x3_s1_fused_weights;
150 case format::winograd_6x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_6x3_s1_fused_weights;
151 case format::image_2d_weights_winograd_6x3_s1_fbxyb: return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb;
152 case format::image_2d_weights_winograd_6x3_s1_xfbyb: return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb;
153 case format::os_is_yx_isa8_osv8_isv4: return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
154 case format::os_is_yx_isa8_osv8_isv4_swizzled_by_4: return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
155 case format::is_o_yx_isv32: return kernel_selector::weights_layout::is_o_yx_isv32;
156 case format::is_o32_yx_isv32_swizzled_by_4: return kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4;
157 case format::os_is_y_x8_osv8_isv4: return kernel_selector::weights_layout::os_is_y_x8_osv8_isv4;
158 case format::bf_lyx_yx: return kernel_selector::weights_layout::bf_lyx_yx;
159 case format::os_is_yx_osv16_isv4: return kernel_selector::weights_layout::os_is_yx_osv16_isv4;
161 return kernel_selector::weights_layout::oi;
165 cldnn::format::type from_weights_layout(kernel_selector::weights_layout l)
169 case kernel_selector::weights_layout::oi:
170 case kernel_selector::weights_layout::oiyx: return cldnn::format::bfyx;
171 case kernel_selector::weights_layout::oyxi: return cldnn::format::byxf;
172 case kernel_selector::weights_layout::io:
173 case kernel_selector::weights_layout::iyxo: return cldnn::format::fyxb;
174 case kernel_selector::weights_layout::yxio: return cldnn::format::yxfb;
175 case kernel_selector::weights_layout::os_iyx_osv16: return cldnn::format::os_iyx_osv16;
176 case kernel_selector::weights_layout::os_iyx_osv32: return cldnn::format::os_iyx_osv32;
177 case kernel_selector::weights_layout::os_iyx_osv64: return cldnn::format::os_iyx_osv64;
178 case kernel_selector::weights_layout::os_i_osv16: return cldnn::format::bs_x_bsv16;
179 case kernel_selector::weights_layout::os_i_osv8__ai8: return cldnn::format::bs_xs_xsv8_bsv8;
180 case kernel_selector::weights_layout::os_i_osv16__ai8: return cldnn::format::bs_xs_xsv8_bsv16;
181 case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b: return cldnn::format::image_2d_weights_c4_fyx_b;
182 case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx: return cldnn::format::image_2d_weights_c1_b_fyx;
183 case kernel_selector::weights_layout::winograd_2x3_s1_weights: return cldnn::format::winograd_2x3_s1_weights;
184 case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights: return cldnn::format::winograd_2x3_s1_fused_weights;
185 case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights: return cldnn::format::winograd_6x3_s1_fused_weights;
186 case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
187 case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
188 case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4: return cldnn::format::os_is_yx_isa8_osv8_isv4;
189 case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4: return cldnn::format::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
190 case kernel_selector::weights_layout::is_o_yx_isv32: return cldnn::format::is_o_yx_isv32;
191 case kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4: return cldnn::format::is_o32_yx_isv32_swizzled_by_4;
192 case kernel_selector::weights_layout::os_is_y_x8_osv8_isv4: return cldnn::format::os_is_y_x8_osv8_isv4;
193 case kernel_selector::weights_layout::bf_lyx_yx: return cldnn::format::bf_lyx_yx;
195 return cldnn::format::bfyx;
199 kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode)
203 case cldnn::tuning_mode::tuning_disabled: return kernel_selector::tuning_mode::TUNING_DISABLED;
204 case cldnn::tuning_mode::tuning_use_cache: return kernel_selector::tuning_mode::TUNING_USE_CACHE;
205 case cldnn::tuning_mode::tuning_tune_and_cache: return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
207 return kernel_selector::tuning_mode::TUNING_DISABLED;
211 std::string to_host_version(const cldnn::version_t& version)
213 std::stringstream ss;
214 ss << version.major << "." << version.minor << "." << version.build << "." << version.revision;
218 kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split, const tensor view_offset)
220 const auto& pad = l.data_padding;
221 const auto& vals = l.size.sizes(l.format);
222 const auto& add_offsets = view_offset.sizes(l.format);
223 const auto& lower_pad = pad.lower_size().sizes(l.format);
224 const auto& upper_pad = pad.upper_size().sizes(l.format);
225 const auto ks_layout = to_data_layout(l.format);
226 kernel_selector::n_dims vec(kernel_selector::DataTensor::ChannelsCount(ks_layout));
231 auto new_vals = vals;
233 if (ks_layout == kernel_selector::Tensor::byxf_af32)
235 new_vals[3] = align_to(vals[3], 32);
237 if (ks_layout == kernel_selector::Tensor::fs_bs_yx_bsv4_fsv32)
239 new_vals[3] = align_to(vals[3], 32);
240 new_vals[2] = align_to(vals[2], 4);
242 if (ks_layout == kernel_selector::Tensor::byx8_f4)
244 new_vals[3] = align_to(vals[3], 4);
245 new_vals[2] = align_to(vals[2], 8);
248 for (size_t i = 0; i < vec.size(); i++)
250 const size_t tensor_index = vec.size() - 1 - i;
251 const auto d = vals[tensor_index];
252 const auto lp = lower_pad[tensor_index];
253 const auto up = upper_pad[tensor_index];
254 // tells us how many elements are reserved in memory for this tensor index
255 const auto reserved_in_mem_count = new_vals[tensor_index];
258 elm.v = static_cast<size_t>(d - add_offsets[tensor_index]);
263 offset += pitch * (add_offsets[tensor_index]);
264 pitch *= (reserved_in_mem_count + lp + up);
267 const int feature_index = kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
268 vec[feature_index].v /= split;
270 return kernel_selector::data_tensor(
272 to_data_type(l.data_type),
277 kernel_selector::weights_tensor convert_weights_tensor(const layout& l)
279 const auto& t = l.size.sizes(l.format);
280 const auto base_layout = to_weights_layout(l.format);
281 const auto ks_type = to_weights_type(l.data_type);
282 const auto ks_layout = to_weights_layout(l.format);
283 std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(base_layout));
285 for (size_t i = 0; i < vec.size(); i++)
287 const size_t tensor_index = t.size() - 1 - i;
288 const auto d = t[tensor_index];
289 vec[i] = static_cast<size_t>(d);
292 return kernel_selector::weights_tensor(
295 base_layout).TransformIgnorePadding(ks_layout);
298 kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func)
300 switch (activation_func)
302 case activation_none:
303 return kernel_selector::activation_function::NONE;
304 case activation_logistic:
305 return kernel_selector::activation_function::LOGISTIC;
306 case activation_hyperbolic_tan:
307 return kernel_selector::activation_function::HYPERBOLIC_TAN;
308 case activation_relu:
309 return kernel_selector::activation_function::RELU;
310 case activation_relu_negative_slope:
311 return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
312 case activation_clamp:
313 return kernel_selector::activation_function::CLAMP;
314 case activation_softrelu:
315 return kernel_selector::activation_function::SOFTRELU;
317 return kernel_selector::activation_function::ABS;
318 case activation_linear:
319 return kernel_selector::activation_function::LINEAR;
320 case activation_square:
321 return kernel_selector::activation_function::SQUARE;
322 case activation_sqrt:
323 return kernel_selector::activation_function::SQRT;
325 return kernel_selector::activation_function::ELU;
327 return kernel_selector::activation_function::SIN;
328 case activation_asin:
329 return kernel_selector::activation_function::ASIN;
330 case activation_sinh:
331 return kernel_selector::activation_function::SINH;
333 return kernel_selector::activation_function::COS;
334 case activation_acos:
335 return kernel_selector::activation_function::ACOS;
336 case activation_cosh:
337 return kernel_selector::activation_function::COSH;
339 return kernel_selector::activation_function::LOG;
340 case activation_log2:
341 return kernel_selector::activation_function::LOG2;
343 return kernel_selector::activation_function::EXP;
345 return kernel_selector::activation_function::NOT;
347 throw std::runtime_error("Unknown activation function");
352 kernel_selector::activation_function get_kernel_selector_activation_grad_param(cldnn_activation_grad_func activation_grad_func)
354 switch (activation_grad_func)
356 case activation_grad_none:
357 return kernel_selector::activation_function::NONE_GRAD;
358 case activation_grad_relu:
359 return kernel_selector::activation_function::RELU_GRAD;
360 case activation_grad_relu_negative_slope:
361 return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE_GRAD;
363 throw std::runtime_error("Unknown activation_grad function");
368 void set_params(const program_node& node, kernel_selector::params& params)
370 const auto& context = node.get_program().get_engine().get_context();
371 const auto& engine_info = context->get_engine_info();
373 params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups");
374 params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
375 params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16");
376 params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64");
377 params.engineInfo.bIMADSupport = engine_info.supports_imad != 0;
378 params.engineInfo.bIMMADSupport = engine_info.supports_immad != 0;
379 params.engineInfo.bImageSupport = engine_info.supports_image != 0;
380 params.engineInfo.maxWorkGroupSize = engine_info.max_work_group_size;
381 params.engineInfo.maxLocalMemSize = engine_info.max_local_mem_size;
382 params.engineInfo.maxImage2dWidth = engine_info.max_image2d_width;
383 params.engineInfo.maxImage2dHeight = engine_info.max_image2d_height;
384 params.engineInfo.deviceId = engine_info.dev_id;
385 params.engineInfo.computeUnitsCount = engine_info.compute_units_count;
386 params.engineInfo.deviceCache = engine_info.device_cache;
387 params.engineInfo.driverVersion = engine_info.driver_version;
388 params.engineInfo.hostVersion = to_host_version(cldnn::get_version());
391 void set_learning_params(const program_node& node, kernel_selector::training_params& params, bool use_momentum)
393 const auto learning_params = node.get_program().get_options().template get<build_option_type::learning_config>()->params;
397 params.use_momentum = true;
400 params.momentum_factor = learning_params.momentum;
401 params.weights_decay = learning_params.weights_decay;
404 void set_optional_params(const program_impl& program, kernel_selector::optional_params& params)
406 const auto& context = program.get_engine().get_context();
408 params.meaningfulKernelsNames = context->get_configuration().meaningful_kernels_names;
409 params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled();
410 params.allowInputReordering = false;
411 params.allowOutputReordering = false;
413 const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
414 params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
415 params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;