2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 #include "api/C/cldnn.h"
19 #include "api/CPP/program.hpp"
20 #include "program_impl.h"
21 #include "gpu/ocl_toolkit.h"
22 #include "tensor_type.h"
23 #include "kernel_selector_params.h"
24 #include "kernel_selector_common.h"
27 using namespace cldnn;
29 namespace kernel_selector
31 using n_dims = kernel_selector::Tensor::NDims;
32 using kernel_data = kernel_selector::KernelData;
33 using kernel_string = kernel_selector::KernelString;
34 using cl_kernel_data = kernel_selector::clKernelData;
35 using kernel_arguments = kernel_selector::Arguments;
36 using kernel_argument_element = kernel_selector::ArgumentDescriptor;
37 using kernel_argument_types = kernel_selector::ArgumentDescriptor::Types;
38 using kernel_scalar_arguments = kernel_selector::Scalars;
39 using kernel_scalar_argument_types = kernel_selector::ScalarDescriptor::Types;
40 using jit_constants = kernel_selector::JitConstants;
42 using data_type = kernel_selector::Datatype;
43 using kernel_type = kernel_selector::KernelType;
44 using weights_type = kernel_selector::WeightsType;
45 using activation_function = kernel_selector::ActivationFunction;
46 using pool_type = kernel_selector::PoolType;
47 using pool_remainder = kernel_selector::PoolRemainder;
48 using argm_axis = kernel_selector::ArgMaxMinAxis;
49 using argm_output = kernel_selector::ArgMaxMinOut;
50 using lookt_axis = kernel_selector::LookUpTableAxis;
51 using lrn_mode = kernel_selector::LRNMode;
52 using normalize_mode = kernel_selector::NormalizeMode;
53 using mvn_mode = kernel_selector::MVNMode;
54 using kernel_divider_mode = kernel_selector::KernelDividerMode;
55 using eltwise_mode = kernel_selector::EltwiseMode;
56 using eltwise_input_mode = kernel_selector::EltwiseInputMode;
57 using softmax_dim = kernel_selector::SoftmaxDim;
58 using mean_subtruct_mode = kernel_selector::MeanSubtractMode;
59 using mean_op = kernel_selector::MeanOp;
60 using concat_axis = kernel_selector::ConcatAxis;
61 using tuning_mode = kernel_selector::TuningMode;
62 using sample_type = kernel_selector::SampleType;
64 using data_tensor = kernel_selector::DataTensor;
65 using weights_tensor = kernel_selector::WeightsTensor;
66 using data_layout = kernel_selector::DataLayout;
67 using weights_layout = kernel_selector::WeightsLayout;
68 using multi_data_tensor = kernel_selector::MultiDataTensor;
70 using params = kernel_selector::Params;
71 using weights_reorder_params = kernel_selector::WeightsReorderParams;
72 using generic_kernel_params = kernel_selector::GenericKernelParams;
75 inline kernel_selector::data_type to_data_type(data_types dt)
79 case cldnn::data_types::i8: return kernel_selector::data_type::INT8;
80 case cldnn::data_types::u8: return kernel_selector::data_type::UINT8;
81 case cldnn::data_types::f16: return kernel_selector::data_type::F16;
82 case cldnn::data_types::f32: return kernel_selector::data_type::F32;
85 return kernel_selector::data_type::F16;
89 inline data_types from_data_type(kernel_selector::data_type dt)
93 case kernel_selector::data_type::INT8: return cldnn::data_types::i8;
94 case kernel_selector::data_type::UINT8: return cldnn::data_types::u8;
95 case kernel_selector::data_type::F16: return cldnn::data_types::f16;
96 case kernel_selector::data_type::F32: return cldnn::data_types::f32;
99 return cldnn::data_types::f16;
103 inline kernel_selector::weights_type to_weights_type(data_types dt)
107 case cldnn::data_types::i8: return kernel_selector::weights_type::INT8;
108 case cldnn::data_types::f16: return kernel_selector::weights_type::F16;
109 case cldnn::data_types::f32: return kernel_selector::weights_type::F32;
112 return kernel_selector::weights_type::F16;
116 inline data_types from_weights_type(kernel_selector::weights_type dt)
120 case kernel_selector::weights_type::INT8: return data_types::i8;
121 case kernel_selector::weights_type::F16: return data_types::f16;
122 case kernel_selector::weights_type::F32: return data_types::f32;
125 return data_types::f16;;
129 inline kernel_selector::data_layout to_data_layout(format f)
133 case format::bfyx: return kernel_selector::data_layout::bfyx;
134 case format::yxfb: return kernel_selector::data_layout::yxfb;
135 case format::byxf: return kernel_selector::data_layout::byxf;
136 case format::fyxb: return kernel_selector::data_layout::fyxb;
137 case format::bs_x_bsv16: return kernel_selector::data_layout::bs_f_bsv16__af8;
138 case format::bs_xs_xsv8_bsv8: return kernel_selector::data_layout::bs_f_bsv8__af8;
139 case format::bs_xs_xsv8_bsv16: return kernel_selector::data_layout::bs_f_bsv16__af8;
140 case format::bf8_xy16: return kernel_selector::data_layout::bf8_xy16;
141 case format::winograd_2x3_s1_data: return kernel_selector::data_layout::winograd_2x3_s1_data;
142 case format::byxf_af32: return kernel_selector::data_layout::byxf_af32;
143 // case format::brfyx: return kernel_selector::data_layout::brfyx;
145 return kernel_selector::data_layout::bfyx;
149 static inline cldnn::format from_data_layout(kernel_selector::data_layout l)
153 case kernel_selector::data_layout::bf: return cldnn::format::bfyx;
154 case kernel_selector::data_layout::fb: return cldnn::format::fyxb;
155 case kernel_selector::data_layout::bfyx: return cldnn::format::bfyx;
156 case kernel_selector::data_layout::yxfb: return cldnn::format::yxfb;
157 case kernel_selector::data_layout::byxf: return cldnn::format::byxf;
158 case kernel_selector::data_layout::fyxb: return cldnn::format::fyxb;
159 case kernel_selector::data_layout::bs_f_bsv8__af8: return cldnn::format::bs_xs_xsv8_bsv8;
160 case kernel_selector::data_layout::bs_f_bsv16__af8: return cldnn::format::bs_x_bsv16;
161 case kernel_selector::data_layout::bf8_xy16: return cldnn::format::bf8_xy16;
162 case kernel_selector::data_layout::brfyx: return cldnn::format::bfyx;
163 case kernel_selector::data_layout::winograd_2x3_s1_data: return cldnn::format::winograd_2x3_s1_data;
164 case kernel_selector::data_layout::byxf_af32: return cldnn::format::byxf_af32;
166 return cldnn::format::bfyx;
171 inline kernel_selector::weights_layout to_weights_layout(format f)
175 case format::bfyx: return kernel_selector::weights_layout::oiyx;
176 case format::fyxb: return kernel_selector::weights_layout::iyxo;
177 case format::byxf: return kernel_selector::weights_layout::oyxi;
178 case format::yxfb: return kernel_selector::weights_layout::yxio;
179 case format::os_iyx_osv16: return kernel_selector::weights_layout::os_iyx_osv16;
180 case format::bs_xs_xsv8_bsv8: return kernel_selector::weights_layout::os_i_osv8__ai8;
181 case format::bs_xs_xsv8_bsv16: return kernel_selector::weights_layout::os_i_osv16__ai8;
182 case format::bs_x_bsv16: return kernel_selector::weights_layout::os_i_osv16;
183 case format::image_2d_weights_c4_fyx_b: return kernel_selector::weights_layout::image_2d_weights_c4_fyx_b;
184 case format::image_2d_weights_c1_b_fyx: return kernel_selector::weights_layout::image_2d_weights_c1_b_fyx;
185 case format::winograd_2x3_s1_weights: return kernel_selector::weights_layout::winograd_2x3_s1_weights;
186 case format::winograd_2x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_2x3_s1_fused_weights;
187 case format::winograd_6x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_6x3_s1_fused_weights;
188 case format::image_2d_weights_winograd_6x3_s1_fbxyb: return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb;
189 case format::image_2d_weights_winograd_6x3_s1_xfbyb: return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb;
190 case format::os_is_yx_isa8_osv8_isv4: return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
192 return kernel_selector::weights_layout::oi;
196 static inline cldnn::format::type from_weights_layout(kernel_selector::weights_layout l)
200 case kernel_selector::weights_layout::oi:
201 case kernel_selector::weights_layout::oiyx: return cldnn::format::bfyx;
202 case kernel_selector::weights_layout::oyxi: return cldnn::format::byxf;
203 case kernel_selector::weights_layout::io:
204 case kernel_selector::weights_layout::iyxo: return cldnn::format::fyxb;
205 case kernel_selector::weights_layout::yxio: return cldnn::format::yxfb;
206 case kernel_selector::weights_layout::os_iyx_osv16: return cldnn::format::os_iyx_osv16;
207 case kernel_selector::weights_layout::os_i_osv16: return cldnn::format::bs_x_bsv16;
208 case kernel_selector::weights_layout::os_i_osv8__ai8: return cldnn::format::bs_xs_xsv8_bsv8;
209 case kernel_selector::weights_layout::os_i_osv16__ai8: return cldnn::format::bs_xs_xsv8_bsv16;
210 case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b: return cldnn::format::image_2d_weights_c4_fyx_b;
211 case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx: return cldnn::format::image_2d_weights_c1_b_fyx;
212 case kernel_selector::weights_layout::winograd_2x3_s1_weights: return cldnn::format::winograd_2x3_s1_weights;
213 case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights: return cldnn::format::winograd_2x3_s1_fused_weights;
214 case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights: return cldnn::format::winograd_6x3_s1_fused_weights;
215 case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
216 case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
217 case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4: return cldnn::format::os_is_yx_isa8_osv8_isv4;
219 return cldnn::format::bfyx;
223 inline kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode)
227 case cldnn::tuning_mode::tuning_disabled: return kernel_selector::tuning_mode::TUNING_DISABLED;
228 case cldnn::tuning_mode::tuning_use_cache: return kernel_selector::tuning_mode::TUNING_USE_CACHE;
229 case cldnn::tuning_mode::tuning_tune_and_cache: return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
231 return kernel_selector::tuning_mode::TUNING_DISABLED;
235 inline std::string to_host_version(const cldnn::version_t& version)
237 std::stringstream ss;
238 ss << version.major << "." << version.minor << "." << version.build << "." << version.revision;
242 inline kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split = 1, const tensor view_offset = {})
244 const auto& pad = l.data_padding;
245 const auto& vals = l.size.sizes(l.format);
246 const auto& add_offsets = view_offset.sizes(l.format);
247 const auto& lower_pad = pad.lower_size().sizes(l.format);
248 const auto& upper_pad = pad.upper_size().sizes(l.format);
249 const auto ks_layout = to_data_layout(l.format);
250 kernel_selector::n_dims vec(kernel_selector::DataTensor::ChannelsCount(ks_layout));
255 auto new_vals = vals;
257 if (ks_layout == kernel_selector::Tensor::byxf_af32)
259 new_vals[3] = align_to(vals[3], 32);
262 for (size_t i = 0; i < vec.size(); i++)
264 const size_t tensor_index = vec.size() - 1 - i;
265 const auto d = vals[tensor_index];
266 const auto lp = lower_pad[tensor_index];
267 const auto up = upper_pad[tensor_index];
268 // tells us how many elements are reserved in memory for this tensor index
269 const auto reserved_in_mem_count = new_vals[tensor_index];
272 elm.v = static_cast<size_t>(d - add_offsets[tensor_index]);
277 offset += pitch*(add_offsets[tensor_index]);
278 pitch *= (reserved_in_mem_count + lp + up);
281 const int feature_index = kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
282 vec[feature_index].v /= split;
284 return kernel_selector::data_tensor(
286 to_data_type(l.data_type),
291 inline kernel_selector::weights_tensor convert_weights_tensor(const layout& l)
293 assert(l.format.dimension() == 4);
294 const auto& t = l.size.sizes(format::bfyx);
295 const auto base_layout = kernel_selector::weights_layout::oiyx;
296 const auto ks_type = to_weights_type(l.data_type);
297 const auto ks_layout = to_weights_layout(l.format);
298 std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(base_layout));
300 for (size_t i = 0; i < vec.size(); i++)
302 const size_t tensor_index = t.size() - 1 - i;
303 const auto d = t[tensor_index];
304 vec[i] = static_cast<size_t>(d);
307 return kernel_selector::weights_tensor(
310 base_layout).TransformIgnorePadding(ks_layout);
313 template <typename p_type>
314 inline void convert_activation_func_params(const p_type primitive, kernel_selector::base_params& params)
316 const float negative_slope = primitive->activation_negative_slope;
319 params.activationParams.m = negative_slope;
320 params.activationFunc = kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
324 params.activationFunc = kernel_selector::activation_function::RELU;
328 inline kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func)
330 switch (activation_func)
332 case activation_none:
333 return kernel_selector::activation_function::NONE;
334 case activation_logistic:
335 return kernel_selector::activation_function::LOGISTIC;
336 case activation_hyperbolic_tan:
337 return kernel_selector::activation_function::HYPERBOLIC_TAN;
338 case activation_relu:
339 return kernel_selector::activation_function::RELU;
340 case activation_relu_negative_slope:
341 return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
342 case activation_clamp:
343 return kernel_selector::activation_function::CLAMP;
344 case activation_softrelu:
345 return kernel_selector::activation_function::SOFTRELU;
347 return kernel_selector::activation_function::ABS;
348 case activation_linear:
349 return kernel_selector::activation_function::LINEAR;
350 case activation_square:
351 return kernel_selector::activation_function::SQUARE;
352 case activation_sqrt:
353 return kernel_selector::activation_function::SQRT;
355 return kernel_selector::activation_function::ELU;
357 throw std::runtime_error("Unknown activation function");
362 inline kernel_selector::activation_function get_kernel_selector_activation_grad_param(cldnn_activation_grad_func activation_grad_func)
364 switch (activation_grad_func)
366 case activation_grad_none:
367 return kernel_selector::activation_function::NONE_GRAD;
368 case activation_grad_relu:
369 return kernel_selector::activation_function::RELU_GRAD;
370 case activation_grad_relu_negative_slope:
371 return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE_GRAD;
373 throw std::runtime_error("Unknown activation_grad function");
378 template <typename arg_t>
379 inline void convert_fused_activation_func_params(const arg_t& arg, kernel_selector::base_params& params)
381 params.activationParams.m = arg.get_fused_activation_params().a;
382 params.activationParams.n = arg.get_fused_activation_params().b;
383 params.activationFunc = get_kernel_selector_activation_param(arg.get_fused_activation_func());
386 template <typename p_type>
387 inline void convert_new_activation_func(const p_type primitive, kernel_selector::base_params& params)
389 params.activationFunc = get_kernel_selector_activation_param(primitive->activation_func);
390 params.activationParams.m = primitive->additional_params.a;
391 params.activationParams.n = primitive->additional_params.b;
394 template <typename params_t, typename arg_t>
395 inline params_t get_default_params(const arg_t& arg, uint32_t split = 1)
399 const auto& context = arg.get_program().get_engine().get_context();
400 const auto& engine_info = context->get_engine_info();
402 params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups");
403 params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
404 params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16");
405 params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64");
406 params.engineInfo.bImageSupport = engine_info.supports_image != 0;
407 params.engineInfo.maxWorkGroupSize = engine_info.max_work_group_size;
408 params.engineInfo.maxLocalMemSize = engine_info.max_local_mem_size;
409 params.engineInfo.maxImage2dWidth = engine_info.max_image2d_width;
410 params.engineInfo.maxImage2dHeight = engine_info.max_image2d_height;
411 params.engineInfo.deviceId = engine_info.dev_id;
412 params.engineInfo.driverVersion = engine_info.driver_version;
413 params.engineInfo.hostVersion = to_host_version(cldnn::get_version());
415 const auto& input_layout = arg.input().get_output_layout();
416 const auto& output_layout = arg.get_output_layout();
418 params.inputs[0] = convert_data_tensor(input_layout, split);
419 params.output = convert_data_tensor(output_layout, split);
421 params.layerID = arg.id();
423 convert_fused_activation_func_params(arg, params);
428 template <typename params_t, typename arg_t>
429 inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1)
431 params_t params = get_default_params<params_t>(arg, split);
433 const auto& weights_layout = arg.weights().get_output_layout();
434 params.weights = convert_weights_tensor(weights_layout);
438 const auto& bias_layout = arg.bias().get_output_layout();
439 // bias per output is not supported on cldnn
440 params.bias.push_back(convert_data_tensor(bias_layout).FlattenFeatureAndSpatials());
446 template <typename params_t, typename arg_t>
447 inline params_t get_default_learning_params(const arg_t& arg, uint32_t split = 1)
449 params_t params = get_weights_bias_default_params<params_t>(arg, split);
451 const auto learning_params = arg.get_program().get_options().template get<build_option_type::learning_config>()->params;
453 if (arg.use_momentum())
455 params.use_momentum = true;
458 params.momentum_factor = learning_params.momentum;
459 params.weights_decay = learning_params.weights_decay;
464 template <typename optional_params_t>
465 inline optional_params_t get_default_optional_params(const program_impl& program)
467 optional_params_t params;
469 const auto& context = program.get_engine().get_context();
471 params.meaningfulKernelsNames = context->get_configuration().meaningful_kernels_names;
472 params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled();
473 params.allowInputReordering = false;
474 params.allowOutputReordering = false;
476 const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
477 params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
478 params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
483 template <typename optional_params_t>
484 inline optional_params_t get_default_weights_bias_optional_params(const program_impl& program)
486 return get_default_optional_params<optional_params_t>(program);
489 template <typename optional_params_t>
490 inline optional_params_t get_default_learning_optional_params(const program_impl& program)
492 return get_default_weights_bias_optional_params<optional_params_t>(program);