Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / kernel_selector_helper.cpp
1 // Copyright (c) 2016-2019 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "kernel_selector_helper.h"
16 #include "kernel_selector_params.h"
17
18 #include "gpu/ocl_toolkit.h"
19
20 #include "program_node.h"
21 #include "program_impl.h"
22
23 #include "training_params.h"
24
25 kernel_selector::data_type to_data_type(data_types dt)
26 {
27     switch (dt)
28     {
29     case cldnn::data_types::i8:     return kernel_selector::data_type::INT8;
30     case cldnn::data_types::u8:     return kernel_selector::data_type::UINT8;
31     case cldnn::data_types::i32:     return kernel_selector::data_type::INT32;
32     case cldnn::data_types::i64:     return kernel_selector::data_type::INT64;
33     case cldnn::data_types::f16:    return kernel_selector::data_type::F16;
34     case cldnn::data_types::f32:    return kernel_selector::data_type::F32;
35     default:
36         assert(0);
37         return kernel_selector::data_type::F16;
38     }
39 }
40
41 data_types from_data_type(kernel_selector::data_type dt)
42 {
43     switch (dt)
44     {
45     case kernel_selector::data_type::INT8:   return cldnn::data_types::i8;
46     case kernel_selector::data_type::UINT8:   return cldnn::data_types::u8;
47     case kernel_selector::data_type::INT32:   return cldnn::data_types::i32;
48     case kernel_selector::data_type::INT64:   return cldnn::data_types::i64;
49     case kernel_selector::data_type::F16:    return cldnn::data_types::f16;
50     case kernel_selector::data_type::F32:    return cldnn::data_types::f32;
51     default:
52         assert(0);
53         return cldnn::data_types::f16;
54     }
55 }
56
57 kernel_selector::weights_type to_weights_type(data_types dt)
58 {
59     switch (dt)
60     {
61     case cldnn::data_types::i8:     return kernel_selector::weights_type::INT8;
62     case cldnn::data_types::u8:     return kernel_selector::weights_type::UINT8;
63     case cldnn::data_types::f16:    return kernel_selector::weights_type::F16;
64     case cldnn::data_types::f32:    return kernel_selector::weights_type::F32;
65     default:
66         assert(0);
67         return kernel_selector::weights_type::F16;
68     }
69 }
70
71 data_types from_weights_type(kernel_selector::weights_type dt)
72 {
73     switch (dt)
74     {
75     case kernel_selector::weights_type::INT8:   return data_types::i8;
76     case kernel_selector::weights_type::UINT8:  return data_types::u8;
77     case kernel_selector::weights_type::F16:    return data_types::f16;
78     case kernel_selector::weights_type::F32:    return data_types::f32;
79     default:
80         assert(0);
81         return data_types::f16;;
82     }
83 }
84
85 kernel_selector::data_layout to_data_layout(format f)
86 {
87     switch (f)
88     {
89     case format::bfyx:              return kernel_selector::data_layout::bfyx;
90     case format::yxfb:              return kernel_selector::data_layout::yxfb;
91     case format::byxf:              return kernel_selector::data_layout::byxf;
92     case format::fyxb:              return kernel_selector::data_layout::fyxb;
93     case format::bs_x_bsv16:        return kernel_selector::data_layout::bs_f_bsv16__af8;
94     case format::bs_xs_xsv8_bsv8:   return kernel_selector::data_layout::bs_f_bsv8__af8;
95     case format::bs_xs_xsv8_bsv16:  return kernel_selector::data_layout::bs_f_bsv16__af8;
96     case format::bf8_xy16:          return kernel_selector::data_layout::bf8_xy16;
97     case format::winograd_2x3_s1_data:  return kernel_selector::data_layout::winograd_2x3_s1_data;
98     case format::byxf_af32: return kernel_selector::data_layout::byxf_af32;
99     case format::byx8_f4: return kernel_selector::data_layout::byx8_f4;
100     case format::fs_bs_yx_bsv4_fsv32: return kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32;
101         //     case format::brfyx:          return kernel_selector::data_layout::brfyx;
102     case format::b_fs_yx_fsv4:       return kernel_selector::data_layout::b_fs_yx_fsv4;
103     default:
104         return kernel_selector::data_layout::bfyx;
105     }
106 }
107
108 cldnn::format from_data_layout(kernel_selector::data_layout l)
109 {
110     switch (l)
111     {
112     case kernel_selector::data_layout::bf:                return cldnn::format::bfyx;
113     case kernel_selector::data_layout::fb:                return cldnn::format::fyxb;
114     case kernel_selector::data_layout::bfyx:              return cldnn::format::bfyx;
115     case kernel_selector::data_layout::yxfb:              return cldnn::format::yxfb;
116     case kernel_selector::data_layout::byxf:              return cldnn::format::byxf;
117     case kernel_selector::data_layout::fyxb:              return cldnn::format::fyxb;
118     case kernel_selector::data_layout::bs_f_bsv8__af8:    return cldnn::format::bs_xs_xsv8_bsv8;
119     case kernel_selector::data_layout::bs_f_bsv16__af8:   return cldnn::format::bs_x_bsv16;
120     case kernel_selector::data_layout::bf8_xy16:          return cldnn::format::bf8_xy16;
121     case kernel_selector::data_layout::brfyx:             return cldnn::format::bfyx;
122     case kernel_selector::data_layout::winograd_2x3_s1_data:   return cldnn::format::winograd_2x3_s1_data;
123     case kernel_selector::data_layout::byxf_af32: return cldnn::format::byxf_af32;
124     case kernel_selector::data_layout::byx8_f4: return cldnn::format::byx8_f4;
125     case kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32: return cldnn::format::fs_bs_yx_bsv4_fsv32;
126     default:
127         return cldnn::format::bfyx;
128         break;
129     }
130 }
131
132 kernel_selector::weights_layout to_weights_layout(format f)
133 {
134     switch (f)
135     {
136     case format::bfyx:              return kernel_selector::weights_layout::oiyx;
137     case format::fyxb:              return kernel_selector::weights_layout::iyxo;
138     case format::byxf:              return kernel_selector::weights_layout::oyxi;
139     case format::yxfb:              return kernel_selector::weights_layout::yxio;
140     case format::os_iyx_osv16:      return kernel_selector::weights_layout::os_iyx_osv16;
141     case format::os_iyx_osv32:      return kernel_selector::weights_layout::os_iyx_osv32;
142     case format::os_iyx_osv64:      return kernel_selector::weights_layout::os_iyx_osv64;
143     case format::bs_xs_xsv8_bsv8:   return kernel_selector::weights_layout::os_i_osv8__ai8;
144     case format::bs_xs_xsv8_bsv16:  return kernel_selector::weights_layout::os_i_osv16__ai8;
145     case format::bs_x_bsv16:        return kernel_selector::weights_layout::os_i_osv16;
146     case format::image_2d_weights_c4_fyx_b:     return kernel_selector::weights_layout::image_2d_weights_c4_fyx_b;
147     case format::image_2d_weights_c1_b_fyx:     return kernel_selector::weights_layout::image_2d_weights_c1_b_fyx;
148     case format::winograd_2x3_s1_weights:       return kernel_selector::weights_layout::winograd_2x3_s1_weights;
149     case format::winograd_2x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_2x3_s1_fused_weights;
150     case format::winograd_6x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_6x3_s1_fused_weights;
151     case format::image_2d_weights_winograd_6x3_s1_fbxyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb;
152     case format::image_2d_weights_winograd_6x3_s1_xfbyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb;
153     case format::os_is_yx_isa8_osv8_isv4:                    return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
154     case format::os_is_yx_isa8_osv8_isv4_swizzled_by_4:      return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
155     case format::is_o_yx_isv32: return kernel_selector::weights_layout::is_o_yx_isv32;
156     case format::is_o32_yx_isv32_swizzled_by_4: return kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4;
157     case format::os_is_y_x8_osv8_isv4: return kernel_selector::weights_layout::os_is_y_x8_osv8_isv4;
158     case format::bf_lyx_yx:                                  return kernel_selector::weights_layout::bf_lyx_yx;
159     case format::os_is_yx_osv16_isv4:  return kernel_selector::weights_layout::os_is_yx_osv16_isv4;
160     default:
161         return kernel_selector::weights_layout::oi;
162     }
163 }
164
165 cldnn::format::type from_weights_layout(kernel_selector::weights_layout l)
166 {
167     switch (l)
168     {
169     case kernel_selector::weights_layout::oi:
170     case kernel_selector::weights_layout::oiyx:            return cldnn::format::bfyx;
171     case kernel_selector::weights_layout::oyxi:            return cldnn::format::byxf;
172     case kernel_selector::weights_layout::io:
173     case kernel_selector::weights_layout::iyxo:            return cldnn::format::fyxb;
174     case kernel_selector::weights_layout::yxio:            return cldnn::format::yxfb;
175     case kernel_selector::weights_layout::os_iyx_osv16:    return cldnn::format::os_iyx_osv16;
176     case kernel_selector::weights_layout::os_iyx_osv32:    return cldnn::format::os_iyx_osv32;
177     case kernel_selector::weights_layout::os_iyx_osv64:    return cldnn::format::os_iyx_osv64;
178     case kernel_selector::weights_layout::os_i_osv16:      return cldnn::format::bs_x_bsv16;
179     case kernel_selector::weights_layout::os_i_osv8__ai8:  return cldnn::format::bs_xs_xsv8_bsv8;
180     case kernel_selector::weights_layout::os_i_osv16__ai8: return cldnn::format::bs_xs_xsv8_bsv16;
181     case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b:     return cldnn::format::image_2d_weights_c4_fyx_b;
182     case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx:     return cldnn::format::image_2d_weights_c1_b_fyx;
183     case kernel_selector::weights_layout::winograd_2x3_s1_weights:       return cldnn::format::winograd_2x3_s1_weights;
184     case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights: return cldnn::format::winograd_2x3_s1_fused_weights;
185     case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights: return cldnn::format::winograd_6x3_s1_fused_weights;
186     case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
187     case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
188     case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4:                return cldnn::format::os_is_yx_isa8_osv8_isv4;
189     case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4:  return cldnn::format::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
190     case kernel_selector::weights_layout::is_o_yx_isv32:                          return cldnn::format::is_o_yx_isv32;
191     case kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4: return cldnn::format::is_o32_yx_isv32_swizzled_by_4;
192     case kernel_selector::weights_layout::os_is_y_x8_osv8_isv4: return cldnn::format::os_is_y_x8_osv8_isv4;
193     case kernel_selector::weights_layout::bf_lyx_yx:                              return cldnn::format::bf_lyx_yx;
194     default:
195         return cldnn::format::bfyx;
196     }
197 }
198
199 kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode)
200 {
201     switch (mode)
202     {
203     case cldnn::tuning_mode::tuning_disabled:         return kernel_selector::tuning_mode::TUNING_DISABLED;
204     case cldnn::tuning_mode::tuning_use_cache:        return kernel_selector::tuning_mode::TUNING_USE_CACHE;
205     case cldnn::tuning_mode::tuning_tune_and_cache:   return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
206     default:
207         return kernel_selector::tuning_mode::TUNING_DISABLED;
208     }
209 }
210
211 std::string to_host_version(const cldnn::version_t& version)
212 {
213     std::stringstream ss;
214     ss << version.major << "." << version.minor << "." << version.build << "." << version.revision;
215     return ss.str();
216 }
217
218 kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split, const tensor view_offset)
219 {
220     const auto& pad = l.data_padding;
221     const auto& vals = l.size.sizes(l.format);
222     const auto& add_offsets = view_offset.sizes(l.format);
223     const auto& lower_pad = pad.lower_size().sizes(l.format);
224     const auto& upper_pad = pad.upper_size().sizes(l.format);
225     const auto ks_layout = to_data_layout(l.format);
226     kernel_selector::n_dims vec(kernel_selector::DataTensor::ChannelsCount(ks_layout));
227
228     size_t pitch = 1;
229     size_t offset = 0;
230
231     auto new_vals = vals;
232
233     if (ks_layout == kernel_selector::Tensor::byxf_af32)
234     {
235         new_vals[3] = align_to(vals[3], 32);
236     }
237     if (ks_layout == kernel_selector::Tensor::fs_bs_yx_bsv4_fsv32)
238     {
239         new_vals[3] = align_to(vals[3], 32);
240         new_vals[2] = align_to(vals[2], 4);
241     }
242     if (ks_layout == kernel_selector::Tensor::byx8_f4)
243     {
244         new_vals[3] = align_to(vals[3], 4);
245         new_vals[2] = align_to(vals[2], 8);
246     }
247
248     for (size_t i = 0; i < vec.size(); i++)
249     {
250         const size_t tensor_index = vec.size() - 1 - i;
251         const auto d = vals[tensor_index];
252         const auto lp = lower_pad[tensor_index];
253         const auto up = upper_pad[tensor_index];
254         // tells us how many elements are reserved in memory for this tensor index
255         const auto reserved_in_mem_count = new_vals[tensor_index];
256
257         auto& elm = vec[i];
258         elm.v = static_cast<size_t>(d - add_offsets[tensor_index]);
259         elm.pitch = pitch;
260         elm.pad.before = lp;
261         elm.pad.after = up;
262
263         offset += pitch * (add_offsets[tensor_index]);
264         pitch *= (reserved_in_mem_count + lp + up);
265     }
266
267     const int feature_index = kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
268     vec[feature_index].v /= split;
269
270     return kernel_selector::data_tensor(
271         vec,
272         to_data_type(l.data_type),
273         ks_layout,
274         offset);
275 }
276
277 kernel_selector::weights_tensor convert_weights_tensor(const layout& l)
278 {
279     const auto& t = l.size.sizes(l.format);
280     const auto base_layout = to_weights_layout(l.format);
281     const auto ks_type = to_weights_type(l.data_type);
282     const auto ks_layout = to_weights_layout(l.format);
283     std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(base_layout));
284
285     for (size_t i = 0; i < vec.size(); i++)
286     {
287         const size_t tensor_index = t.size() - 1 - i;
288         const auto d = t[tensor_index];
289         vec[i] = static_cast<size_t>(d);
290     }
291
292     return kernel_selector::weights_tensor(
293         vec,
294         ks_type,
295         base_layout).TransformIgnorePadding(ks_layout);
296 }
297
298 kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func)
299 {
300     switch (activation_func)
301     {
302     case activation_none:
303         return kernel_selector::activation_function::NONE;
304     case activation_logistic:
305         return kernel_selector::activation_function::LOGISTIC;
306     case activation_hyperbolic_tan:
307         return kernel_selector::activation_function::HYPERBOLIC_TAN;
308     case activation_relu:
309         return kernel_selector::activation_function::RELU;
310     case activation_relu_negative_slope:
311         return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
312     case activation_clamp:
313         return kernel_selector::activation_function::CLAMP;
314     case activation_softrelu:
315         return kernel_selector::activation_function::SOFTRELU;
316     case activation_abs:
317         return kernel_selector::activation_function::ABS;
318     case activation_linear:
319         return kernel_selector::activation_function::LINEAR;
320     case activation_square:
321         return kernel_selector::activation_function::SQUARE;
322     case activation_sqrt:
323         return kernel_selector::activation_function::SQRT;
324     case activation_elu:
325         return kernel_selector::activation_function::ELU;
326     case activation_sin:
327         return kernel_selector::activation_function::SIN;
328     case activation_asin:
329         return kernel_selector::activation_function::ASIN;
330     case activation_sinh:
331         return kernel_selector::activation_function::SINH;
332     case activation_cos:
333         return kernel_selector::activation_function::COS;
334     case activation_acos:
335         return kernel_selector::activation_function::ACOS;
336     case activation_cosh:
337         return kernel_selector::activation_function::COSH;
338     case activation_log:
339         return kernel_selector::activation_function::LOG;
340     case activation_log2:
341         return kernel_selector::activation_function::LOG2;
342     case activation_exp:
343         return kernel_selector::activation_function::EXP;
344     case activation_not:
345         return kernel_selector::activation_function::NOT;
346     default:
347         throw std::runtime_error("Unknown activation function");
348         break;
349     }
350 }
351
352 kernel_selector::activation_function get_kernel_selector_activation_grad_param(cldnn_activation_grad_func activation_grad_func)
353 {
354     switch (activation_grad_func)
355     {
356     case activation_grad_none:
357         return kernel_selector::activation_function::NONE_GRAD;
358     case activation_grad_relu:
359         return kernel_selector::activation_function::RELU_GRAD;
360     case activation_grad_relu_negative_slope:
361         return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE_GRAD;
362     default:
363         throw std::runtime_error("Unknown activation_grad function");
364         break;
365     }
366 }
367
368 void set_params(const program_node& node, kernel_selector::params& params)
369 {
370     const auto& context = node.get_program().get_engine().get_context();
371     const auto& engine_info = context->get_engine_info();
372
373     params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups");
374     params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
375     params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16");
376     params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64");
377     params.engineInfo.bIMADSupport = engine_info.supports_imad != 0;
378     params.engineInfo.bIMMADSupport = engine_info.supports_immad != 0;
379     params.engineInfo.bImageSupport = engine_info.supports_image != 0;
380     params.engineInfo.maxWorkGroupSize = engine_info.max_work_group_size;
381     params.engineInfo.maxLocalMemSize = engine_info.max_local_mem_size;
382     params.engineInfo.maxImage2dWidth = engine_info.max_image2d_width;
383     params.engineInfo.maxImage2dHeight = engine_info.max_image2d_height;
384     params.engineInfo.deviceId = engine_info.dev_id;
385     params.engineInfo.computeUnitsCount = engine_info.compute_units_count;
386     params.engineInfo.deviceCache = engine_info.device_cache;
387     params.engineInfo.driverVersion = engine_info.driver_version;
388     params.engineInfo.hostVersion = to_host_version(cldnn::get_version());
389 }
390
391 void set_learning_params(const program_node& node, kernel_selector::training_params& params, bool use_momentum)
392 {
393     const auto learning_params = node.get_program().get_options().template get<build_option_type::learning_config>()->params;
394
395     if (use_momentum)
396     {
397         params.use_momentum = true;
398     }
399
400     params.momentum_factor = learning_params.momentum;
401     params.weights_decay = learning_params.weights_decay;
402 }
403
404 void set_optional_params(const program_impl& program, kernel_selector::optional_params& params)
405 {
406     const auto& context = program.get_engine().get_context();
407
408     params.meaningfulKernelsNames = context->get_configuration().meaningful_kernels_names;
409     params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled();
410     params.allowInputReordering = false;
411     params.allowOutputReordering = false;
412
413     const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
414     params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
415     params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
416 }