inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp

   1 // Copyright (c) 2016-2019 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "kernel_selector_helper.h"
  16 #include "kernel_selector_params.h"
  17
  18 #include "gpu/ocl_toolkit.h"
  19
  20 #include "program_node.h"
  21 #include "program_impl.h"
  22
  23 #include "training_params.h"
  24
  25 kernel_selector::data_type to_data_type(data_types dt)
  26 {
  27     switch (dt)
  28     {
  29     case cldnn::data_types::i8:     return kernel_selector::data_type::INT8;
  30     case cldnn::data_types::u8:     return kernel_selector::data_type::UINT8;
  31     case cldnn::data_types::i32:     return kernel_selector::data_type::INT32;
  32     case cldnn::data_types::i64:     return kernel_selector::data_type::INT64;
  33     case cldnn::data_types::f16:    return kernel_selector::data_type::F16;
  34     case cldnn::data_types::f32:    return kernel_selector::data_type::F32;
  35     default:
  36         assert(0);
  37         return kernel_selector::data_type::F16;
  38     }
  39 }
  40
  41 data_types from_data_type(kernel_selector::data_type dt)
  42 {
  43     switch (dt)
  44     {
  45     case kernel_selector::data_type::INT8:   return cldnn::data_types::i8;
  46     case kernel_selector::data_type::UINT8:   return cldnn::data_types::u8;
  47     case kernel_selector::data_type::INT32:   return cldnn::data_types::i32;
  48     case kernel_selector::data_type::INT64:   return cldnn::data_types::i64;
  49     case kernel_selector::data_type::F16:    return cldnn::data_types::f16;
  50     case kernel_selector::data_type::F32:    return cldnn::data_types::f32;
  51     default:
  52         assert(0);
  53         return cldnn::data_types::f16;
  54     }
  55 }
  56
  57 kernel_selector::weights_type to_weights_type(data_types dt)
  58 {
  59     switch (dt)
  60     {
  61     case cldnn::data_types::i8:     return kernel_selector::weights_type::INT8;
  62     case cldnn::data_types::u8:     return kernel_selector::weights_type::UINT8;
  63     case cldnn::data_types::f16:    return kernel_selector::weights_type::F16;
  64     case cldnn::data_types::f32:    return kernel_selector::weights_type::F32;
  65     default:
  66         assert(0);
  67         return kernel_selector::weights_type::F16;
  68     }
  69 }
  70
  71 data_types from_weights_type(kernel_selector::weights_type dt)
  72 {
  73     switch (dt)
  74     {
  75     case kernel_selector::weights_type::INT8:   return data_types::i8;
  76     case kernel_selector::weights_type::UINT8:  return data_types::u8;
  77     case kernel_selector::weights_type::F16:    return data_types::f16;
  78     case kernel_selector::weights_type::F32:    return data_types::f32;
  79     default:
  80         assert(0);
  81         return data_types::f16;;
  82     }
  83 }
  84
  85 kernel_selector::data_layout to_data_layout(format f)
  86 {
  87     switch (f)
  88     {
  89     case format::bfyx:              return kernel_selector::data_layout::bfyx;
  90     case format::yxfb:              return kernel_selector::data_layout::yxfb;
  91     case format::byxf:              return kernel_selector::data_layout::byxf;
  92     case format::fyxb:              return kernel_selector::data_layout::fyxb;
  93     case format::bs_x_bsv16:        return kernel_selector::data_layout::bs_f_bsv16__af8;
  94     case format::bs_xs_xsv8_bsv8:   return kernel_selector::data_layout::bs_f_bsv8__af8;
  95     case format::bs_xs_xsv8_bsv16:  return kernel_selector::data_layout::bs_f_bsv16__af8;
  96     case format::bf8_xy16:          return kernel_selector::data_layout::bf8_xy16;
  97     case format::winograd_2x3_s1_data:  return kernel_selector::data_layout::winograd_2x3_s1_data;
  98     case format::byxf_af32: return kernel_selector::data_layout::byxf_af32;
  99     case format::byx8_f4: return kernel_selector::data_layout::byx8_f4;
 100     case format::fs_bs_yx_bsv4_fsv32: return kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32;
 101         //     case format::brfyx:          return kernel_selector::data_layout::brfyx;
 102     case format::b_fs_yx_fsv4:       return kernel_selector::data_layout::b_fs_yx_fsv4;
 103     default:
 104         return kernel_selector::data_layout::bfyx;
 105     }
 106 }
 107
 108 cldnn::format from_data_layout(kernel_selector::data_layout l)
 109 {
 110     switch (l)
 111     {
 112     case kernel_selector::data_layout::bf:                return cldnn::format::bfyx;
 113     case kernel_selector::data_layout::fb:                return cldnn::format::fyxb;
 114     case kernel_selector::data_layout::bfyx:              return cldnn::format::bfyx;
 115     case kernel_selector::data_layout::yxfb:              return cldnn::format::yxfb;
 116     case kernel_selector::data_layout::byxf:              return cldnn::format::byxf;
 117     case kernel_selector::data_layout::fyxb:              return cldnn::format::fyxb;
 118     case kernel_selector::data_layout::bs_f_bsv8__af8:    return cldnn::format::bs_xs_xsv8_bsv8;
 119     case kernel_selector::data_layout::bs_f_bsv16__af8:   return cldnn::format::bs_x_bsv16;
 120     case kernel_selector::data_layout::bf8_xy16:          return cldnn::format::bf8_xy16;
 121     case kernel_selector::data_layout::brfyx:             return cldnn::format::bfyx;
 122     case kernel_selector::data_layout::winograd_2x3_s1_data:   return cldnn::format::winograd_2x3_s1_data;
 123     case kernel_selector::data_layout::byxf_af32: return cldnn::format::byxf_af32;
 124     case kernel_selector::data_layout::byx8_f4: return cldnn::format::byx8_f4;
 125     case kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32: return cldnn::format::fs_bs_yx_bsv4_fsv32;
 126     default:
 127         return cldnn::format::bfyx;
 128         break;
 129     }
 130 }
 131
 132 kernel_selector::weights_layout to_weights_layout(format f)
 133 {
 134     switch (f)
 135     {
 136     case format::bfyx:              return kernel_selector::weights_layout::oiyx;
 137     case format::fyxb:              return kernel_selector::weights_layout::iyxo;
 138     case format::byxf:              return kernel_selector::weights_layout::oyxi;
 139     case format::yxfb:              return kernel_selector::weights_layout::yxio;
 140     case format::os_iyx_osv16:      return kernel_selector::weights_layout::os_iyx_osv16;
 141     case format::os_iyx_osv32:      return kernel_selector::weights_layout::os_iyx_osv32;
 142     case format::os_iyx_osv64:      return kernel_selector::weights_layout::os_iyx_osv64;
 143     case format::bs_xs_xsv8_bsv8:   return kernel_selector::weights_layout::os_i_osv8__ai8;
 144     case format::bs_xs_xsv8_bsv16:  return kernel_selector::weights_layout::os_i_osv16__ai8;
 145     case format::bs_x_bsv16:        return kernel_selector::weights_layout::os_i_osv16;
 146     case format::image_2d_weights_c4_fyx_b:     return kernel_selector::weights_layout::image_2d_weights_c4_fyx_b;
 147     case format::image_2d_weights_c1_b_fyx:     return kernel_selector::weights_layout::image_2d_weights_c1_b_fyx;
 148     case format::winograd_2x3_s1_weights:       return kernel_selector::weights_layout::winograd_2x3_s1_weights;
 149     case format::winograd_2x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_2x3_s1_fused_weights;
 150     case format::winograd_6x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_6x3_s1_fused_weights;
 151     case format::image_2d_weights_winograd_6x3_s1_fbxyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb;
 152     case format::image_2d_weights_winograd_6x3_s1_xfbyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb;
 153     case format::os_is_yx_isa8_osv8_isv4:                    return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
 154     case format::os_is_yx_isa8_osv8_isv4_swizzled_by_4:      return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
 155     case format::is_o_yx_isv32: return kernel_selector::weights_layout::is_o_yx_isv32;
 156     case format::is_o32_yx_isv32_swizzled_by_4: return kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4;
 157     case format::os_is_y_x8_osv8_isv4: return kernel_selector::weights_layout::os_is_y_x8_osv8_isv4;
 158     case format::bf_lyx_yx:                                  return kernel_selector::weights_layout::bf_lyx_yx;
 159     case format::os_is_yx_osv16_isv4:  return kernel_selector::weights_layout::os_is_yx_osv16_isv4;
 160     default:
 161         return kernel_selector::weights_layout::oi;
 162     }
 163 }
 164
 165 cldnn::format::type from_weights_layout(kernel_selector::weights_layout l)
 166 {
 167     switch (l)
 168     {
 169     case kernel_selector::weights_layout::oi:
 170     case kernel_selector::weights_layout::oiyx:            return cldnn::format::bfyx;
 171     case kernel_selector::weights_layout::oyxi:            return cldnn::format::byxf;
 172     case kernel_selector::weights_layout::io:
 173     case kernel_selector::weights_layout::iyxo:            return cldnn::format::fyxb;
 174     case kernel_selector::weights_layout::yxio:            return cldnn::format::yxfb;
 175     case kernel_selector::weights_layout::os_iyx_osv16:    return cldnn::format::os_iyx_osv16;
 176     case kernel_selector::weights_layout::os_iyx_osv32:    return cldnn::format::os_iyx_osv32;
 177     case kernel_selector::weights_layout::os_iyx_osv64:    return cldnn::format::os_iyx_osv64;
 178     case kernel_selector::weights_layout::os_i_osv16:      return cldnn::format::bs_x_bsv16;
 179     case kernel_selector::weights_layout::os_i_osv8__ai8:  return cldnn::format::bs_xs_xsv8_bsv8;
 180     case kernel_selector::weights_layout::os_i_osv16__ai8: return cldnn::format::bs_xs_xsv8_bsv16;
 181     case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b:     return cldnn::format::image_2d_weights_c4_fyx_b;
 182     case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx:     return cldnn::format::image_2d_weights_c1_b_fyx;
 183     case kernel_selector::weights_layout::winograd_2x3_s1_weights:       return cldnn::format::winograd_2x3_s1_weights;
 184     case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights: return cldnn::format::winograd_2x3_s1_fused_weights;
 185     case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights: return cldnn::format::winograd_6x3_s1_fused_weights;
 186     case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
 187     case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
 188     case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4:                return cldnn::format::os_is_yx_isa8_osv8_isv4;
 189     case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4:  return cldnn::format::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
 190     case kernel_selector::weights_layout::is_o_yx_isv32:                          return cldnn::format::is_o_yx_isv32;
 191     case kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4: return cldnn::format::is_o32_yx_isv32_swizzled_by_4;
 192     case kernel_selector::weights_layout::os_is_y_x8_osv8_isv4: return cldnn::format::os_is_y_x8_osv8_isv4;
 193     case kernel_selector::weights_layout::bf_lyx_yx:                              return cldnn::format::bf_lyx_yx;
 194     default:
 195         return cldnn::format::bfyx;
 196     }
 197 }
 198
 199 kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode)
 200 {
 201     switch (mode)
 202     {
 203     case cldnn::tuning_mode::tuning_disabled:         return kernel_selector::tuning_mode::TUNING_DISABLED;
 204     case cldnn::tuning_mode::tuning_use_cache:        return kernel_selector::tuning_mode::TUNING_USE_CACHE;
 205     case cldnn::tuning_mode::tuning_tune_and_cache:   return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
 206     default:
 207         return kernel_selector::tuning_mode::TUNING_DISABLED;
 208     }
 209 }
 210
 211 std::string to_host_version(const cldnn::version_t& version)
 212 {
 213     std::stringstream ss;
 214     ss << version.major << "." << version.minor << "." << version.build << "." << version.revision;
 215     return ss.str();
 216 }
 217
 218 kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split, const tensor view_offset)
 219 {
 220     const auto& pad = l.data_padding;
 221     const auto& vals = l.size.sizes(l.format);
 222     const auto& add_offsets = view_offset.sizes(l.format);
 223     const auto& lower_pad = pad.lower_size().sizes(l.format);
 224     const auto& upper_pad = pad.upper_size().sizes(l.format);
 225     const auto ks_layout = to_data_layout(l.format);
 226     kernel_selector::n_dims vec(kernel_selector::DataTensor::ChannelsCount(ks_layout));
 227
 228     size_t pitch = 1;
 229     size_t offset = 0;
 230
 231     auto new_vals = vals;
 232
 233     if (ks_layout == kernel_selector::Tensor::byxf_af32)
 234     {
 235         new_vals[3] = align_to(vals[3], 32);
 236     }
 237     if (ks_layout == kernel_selector::Tensor::fs_bs_yx_bsv4_fsv32)
 238     {
 239         new_vals[3] = align_to(vals[3], 32);
 240         new_vals[2] = align_to(vals[2], 4);
 241     }
 242     if (ks_layout == kernel_selector::Tensor::byx8_f4)
 243     {
 244         new_vals[3] = align_to(vals[3], 4);
 245         new_vals[2] = align_to(vals[2], 8);
 246     }
 247
 248     for (size_t i = 0; i < vec.size(); i++)
 249     {
 250         const size_t tensor_index = vec.size() - 1 - i;
 251         const auto d = vals[tensor_index];
 252         const auto lp = lower_pad[tensor_index];
 253         const auto up = upper_pad[tensor_index];
 254         // tells us how many elements are reserved in memory for this tensor index
 255         const auto reserved_in_mem_count = new_vals[tensor_index];
 256
 257         auto& elm = vec[i];
 258         elm.v = static_cast<size_t>(d - add_offsets[tensor_index]);
 259         elm.pitch = pitch;
 260         elm.pad.before = lp;
 261         elm.pad.after = up;
 262
 263         offset += pitch * (add_offsets[tensor_index]);
 264         pitch *= (reserved_in_mem_count + lp + up);
 265     }
 266
 267     const int feature_index = kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
 268     vec[feature_index].v /= split;
 269
 270     return kernel_selector::data_tensor(
 271         vec,
 272         to_data_type(l.data_type),
 273         ks_layout,
 274         offset);
 275 }
 276
 277 kernel_selector::weights_tensor convert_weights_tensor(const layout& l)
 278 {
 279     const auto& t = l.size.sizes(l.format);
 280     const auto base_layout = to_weights_layout(l.format);
 281     const auto ks_type = to_weights_type(l.data_type);
 282     const auto ks_layout = to_weights_layout(l.format);
 283     std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(base_layout));
 284
 285     for (size_t i = 0; i < vec.size(); i++)
 286     {
 287         const size_t tensor_index = t.size() - 1 - i;
 288         const auto d = t[tensor_index];
 289         vec[i] = static_cast<size_t>(d);
 290     }
 291
 292     return kernel_selector::weights_tensor(
 293         vec,
 294         ks_type,
 295         base_layout).TransformIgnorePadding(ks_layout);
 296 }
 297
 298 kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func)
 299 {
 300     switch (activation_func)
 301     {
 302     case activation_none:
 303         return kernel_selector::activation_function::NONE;
 304     case activation_logistic:
 305         return kernel_selector::activation_function::LOGISTIC;
 306     case activation_hyperbolic_tan:
 307         return kernel_selector::activation_function::HYPERBOLIC_TAN;
 308     case activation_relu:
 309         return kernel_selector::activation_function::RELU;
 310     case activation_relu_negative_slope:
 311         return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
 312     case activation_clamp:
 313         return kernel_selector::activation_function::CLAMP;
 314     case activation_softrelu:
 315         return kernel_selector::activation_function::SOFTRELU;
 316     case activation_abs:
 317         return kernel_selector::activation_function::ABS;
 318     case activation_linear:
 319         return kernel_selector::activation_function::LINEAR;
 320     case activation_square:
 321         return kernel_selector::activation_function::SQUARE;
 322     case activation_sqrt:
 323         return kernel_selector::activation_function::SQRT;
 324     case activation_elu:
 325         return kernel_selector::activation_function::ELU;
 326     case activation_sin:
 327         return kernel_selector::activation_function::SIN;
 328     case activation_asin:
 329         return kernel_selector::activation_function::ASIN;
 330     case activation_sinh:
 331         return kernel_selector::activation_function::SINH;
 332     case activation_cos:
 333         return kernel_selector::activation_function::COS;
 334     case activation_acos:
 335         return kernel_selector::activation_function::ACOS;
 336     case activation_cosh:
 337         return kernel_selector::activation_function::COSH;
 338     case activation_log:
 339         return kernel_selector::activation_function::LOG;
 340     case activation_log2:
 341         return kernel_selector::activation_function::LOG2;
 342     case activation_exp:
 343         return kernel_selector::activation_function::EXP;
 344     case activation_not:
 345         return kernel_selector::activation_function::NOT;
 346     default:
 347         throw std::runtime_error("Unknown activation function");
 348         break;
 349     }
 350 }
 351
 352 kernel_selector::activation_function get_kernel_selector_activation_grad_param(cldnn_activation_grad_func activation_grad_func)
 353 {
 354     switch (activation_grad_func)
 355     {
 356     case activation_grad_none:
 357         return kernel_selector::activation_function::NONE_GRAD;
 358     case activation_grad_relu:
 359         return kernel_selector::activation_function::RELU_GRAD;
 360     case activation_grad_relu_negative_slope:
 361         return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE_GRAD;
 362     default:
 363         throw std::runtime_error("Unknown activation_grad function");
 364         break;
 365     }
 366 }
 367
 368 void set_params(const program_node& node, kernel_selector::params& params)
 369 {
 370     const auto& context = node.get_program().get_engine().get_context();
 371     const auto& engine_info = context->get_engine_info();
 372
 373     params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups");
 374     params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
 375     params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16");
 376     params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64");
 377     params.engineInfo.bIMADSupport = engine_info.supports_imad != 0;
 378     params.engineInfo.bIMMADSupport = engine_info.supports_immad != 0;
 379     params.engineInfo.bImageSupport = engine_info.supports_image != 0;
 380     params.engineInfo.maxWorkGroupSize = engine_info.max_work_group_size;
 381     params.engineInfo.maxLocalMemSize = engine_info.max_local_mem_size;
 382     params.engineInfo.maxImage2dWidth = engine_info.max_image2d_width;
 383     params.engineInfo.maxImage2dHeight = engine_info.max_image2d_height;
 384     params.engineInfo.deviceId = engine_info.dev_id;
 385     params.engineInfo.computeUnitsCount = engine_info.compute_units_count;
 386     params.engineInfo.deviceCache = engine_info.device_cache;
 387     params.engineInfo.driverVersion = engine_info.driver_version;
 388     params.engineInfo.hostVersion = to_host_version(cldnn::get_version());
 389 }
 390
 391 void set_learning_params(const program_node& node, kernel_selector::training_params& params, bool use_momentum)
 392 {
 393     const auto learning_params = node.get_program().get_options().template get<build_option_type::learning_config>()->params;
 394
 395     if (use_momentum)
 396     {
 397         params.use_momentum = true;
 398     }
 399
 400     params.momentum_factor = learning_params.momentum;
 401     params.weights_decay = learning_params.weights_decay;
 402 }
 403
 404 void set_optional_params(const program_impl& program, kernel_selector::optional_params& params)
 405 {
 406     const auto& context = program.get_engine().get_context();
 407
 408     params.meaningfulKernelsNames = context->get_configuration().meaningful_kernels_names;
 409     params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled();
 410     params.allowInputReordering = false;
 411     params.allowOutputReordering = false;
 412
 413     const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
 414     params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
 415     params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
 416 }