inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #pragma once
  18 #include "api/C/cldnn.h"
  19 #include "api/CPP/program.hpp"
  20 #include "program_impl.h"
  21 #include "gpu/ocl_toolkit.h"
  22 #include "tensor_type.h"
  23 #include "kernel_selector_params.h"
  24 #include "kernel_selector_common.h"
  25 #include "jitter.h"
  26
  27 using namespace cldnn;
  28
  29 namespace kernel_selector
  30 {
  31     using n_dims                            = kernel_selector::Tensor::NDims;
  32     using kernel_data                       = kernel_selector::KernelData;
  33     using kernel_string                     = kernel_selector::KernelString;
  34     using cl_kernel_data                    = kernel_selector::clKernelData;
  35     using kernel_arguments                  = kernel_selector::Arguments;
  36     using kernel_argument_element           = kernel_selector::ArgumentDescriptor;
  37     using kernel_argument_types             = kernel_selector::ArgumentDescriptor::Types;
  38     using kernel_scalar_arguments           = kernel_selector::Scalars;
  39     using kernel_scalar_argument_types      = kernel_selector::ScalarDescriptor::Types;
  40     using jit_constants                     = kernel_selector::JitConstants;
  41
  42     using data_type                         = kernel_selector::Datatype;
  43     using kernel_type                       = kernel_selector::KernelType;
  44     using weights_type                      = kernel_selector::WeightsType;
  45     using activation_function               = kernel_selector::ActivationFunction;
  46     using pool_type                         = kernel_selector::PoolType;
  47     using pool_remainder                    = kernel_selector::PoolRemainder;
  48         using argm_axis                                                 = kernel_selector::ArgMaxMinAxis;
  49         using argm_output                                               = kernel_selector::ArgMaxMinOut;
  50     using lookt_axis                        = kernel_selector::LookUpTableAxis;
  51     using lrn_mode                          = kernel_selector::LRNMode;
  52     using normalize_mode                    = kernel_selector::NormalizeMode;
  53     using mvn_mode                          = kernel_selector::MVNMode;
  54     using kernel_divider_mode               = kernel_selector::KernelDividerMode;
  55     using eltwise_mode                      = kernel_selector::EltwiseMode;
  56     using eltwise_input_mode                = kernel_selector::EltwiseInputMode;
  57     using softmax_dim                       = kernel_selector::SoftmaxDim;
  58     using mean_subtruct_mode                = kernel_selector::MeanSubtractMode;
  59     using mean_op                           = kernel_selector::MeanOp;
  60     using concat_axis                       = kernel_selector::ConcatAxis;
  61     using tuning_mode                       = kernel_selector::TuningMode;
  62     using sample_type                       = kernel_selector::SampleType;
  63
  64     using data_tensor                       = kernel_selector::DataTensor;
  65     using weights_tensor                    = kernel_selector::WeightsTensor;
  66     using data_layout                       = kernel_selector::DataLayout;
  67     using weights_layout                    = kernel_selector::WeightsLayout;
  68     using multi_data_tensor                 = kernel_selector::MultiDataTensor;
  69
  70     using params                            = kernel_selector::Params;
  71     using weights_reorder_params            = kernel_selector::WeightsReorderParams;
  72     using generic_kernel_params             = kernel_selector::GenericKernelParams;
  73 }
  74
  75 inline kernel_selector::data_type to_data_type(data_types dt)
  76 {
  77     switch (dt)
  78     {
  79     case cldnn::data_types::i8:     return kernel_selector::data_type::INT8;
  80     case cldnn::data_types::u8:     return kernel_selector::data_type::UINT8;
  81     case cldnn::data_types::f16:    return kernel_selector::data_type::F16;
  82     case cldnn::data_types::f32:    return kernel_selector::data_type::F32;
  83     default:
  84         assert(0);
  85         return kernel_selector::data_type::F16;
  86     }
  87 }
  88
  89 inline data_types from_data_type(kernel_selector::data_type dt)
  90 {
  91     switch (dt)
  92     {
  93     case kernel_selector::data_type::INT8:   return cldnn::data_types::i8;
  94     case kernel_selector::data_type::UINT8:   return cldnn::data_types::u8;
  95     case kernel_selector::data_type::F16:    return cldnn::data_types::f16;
  96     case kernel_selector::data_type::F32:    return cldnn::data_types::f32;
  97     default:
  98         assert(0);
  99         return cldnn::data_types::f16;
 100     }
 101 }
 102
 103 inline kernel_selector::weights_type to_weights_type(data_types dt)
 104 {
 105     switch (dt)
 106     {
 107     case cldnn::data_types::i8:     return kernel_selector::weights_type::INT8;
 108     case cldnn::data_types::f16:    return kernel_selector::weights_type::F16;
 109     case cldnn::data_types::f32:    return kernel_selector::weights_type::F32;
 110     default:
 111         assert(0);
 112         return kernel_selector::weights_type::F16;
 113     }
 114 }
 115
 116 inline data_types from_weights_type(kernel_selector::weights_type dt)
 117 {
 118     switch (dt)
 119     {
 120     case kernel_selector::weights_type::INT8:   return data_types::i8;
 121     case kernel_selector::weights_type::F16:    return data_types::f16;
 122     case kernel_selector::weights_type::F32:    return data_types::f32;
 123     default:
 124         assert(0);
 125         return data_types::f16;;
 126     }
 127 }
 128
 129 inline kernel_selector::data_layout to_data_layout(format f)
 130 {
 131     switch (f)
 132     {
 133     case format::bfyx:              return kernel_selector::data_layout::bfyx;
 134     case format::yxfb:              return kernel_selector::data_layout::yxfb;
 135     case format::byxf:              return kernel_selector::data_layout::byxf;
 136     case format::fyxb:              return kernel_selector::data_layout::fyxb;
 137     case format::bs_x_bsv16:        return kernel_selector::data_layout::bs_f_bsv16__af8;
 138     case format::bs_xs_xsv8_bsv8:   return kernel_selector::data_layout::bs_f_bsv8__af8;
 139     case format::bs_xs_xsv8_bsv16:  return kernel_selector::data_layout::bs_f_bsv16__af8;
 140     case format::bf8_xy16:          return kernel_selector::data_layout::bf8_xy16;
 141     case format::winograd_2x3_s1_data:  return kernel_selector::data_layout::winograd_2x3_s1_data;
 142     case format::byxf_af32: return kernel_selector::data_layout::byxf_af32;
 143 //     case format::brfyx:          return kernel_selector::data_layout::brfyx;
 144     default:
 145         return kernel_selector::data_layout::bfyx;
 146     }
 147 }
 148
 149 static inline cldnn::format from_data_layout(kernel_selector::data_layout l)
 150 {
 151     switch (l)
 152     {
 153     case kernel_selector::data_layout::bf:                return cldnn::format::bfyx;
 154     case kernel_selector::data_layout::fb:                return cldnn::format::fyxb;
 155     case kernel_selector::data_layout::bfyx:              return cldnn::format::bfyx;
 156     case kernel_selector::data_layout::yxfb:              return cldnn::format::yxfb;
 157     case kernel_selector::data_layout::byxf:              return cldnn::format::byxf;
 158     case kernel_selector::data_layout::fyxb:              return cldnn::format::fyxb;
 159     case kernel_selector::data_layout::bs_f_bsv8__af8:    return cldnn::format::bs_xs_xsv8_bsv8;
 160     case kernel_selector::data_layout::bs_f_bsv16__af8:   return cldnn::format::bs_x_bsv16;
 161     case kernel_selector::data_layout::bf8_xy16:          return cldnn::format::bf8_xy16;
 162     case kernel_selector::data_layout::brfyx:             return cldnn::format::bfyx;
 163     case kernel_selector::data_layout::winograd_2x3_s1_data:   return cldnn::format::winograd_2x3_s1_data;
 164     case kernel_selector::data_layout::byxf_af32: return cldnn::format::byxf_af32;
 165     default:
 166         return cldnn::format::bfyx;
 167         break;
 168     }
 169 }
 170
 171 inline kernel_selector::weights_layout to_weights_layout(format f)
 172 {
 173     switch (f)
 174     {
 175     case format::bfyx:              return kernel_selector::weights_layout::oiyx;
 176     case format::fyxb:              return kernel_selector::weights_layout::iyxo;
 177     case format::byxf:              return kernel_selector::weights_layout::oyxi;
 178     case format::yxfb:              return kernel_selector::weights_layout::yxio;
 179     case format::os_iyx_osv16:      return kernel_selector::weights_layout::os_iyx_osv16;
 180     case format::bs_xs_xsv8_bsv8:   return kernel_selector::weights_layout::os_i_osv8__ai8;
 181     case format::bs_xs_xsv8_bsv16:  return kernel_selector::weights_layout::os_i_osv16__ai8;
 182     case format::bs_x_bsv16:        return kernel_selector::weights_layout::os_i_osv16;
 183     case format::image_2d_weights_c4_fyx_b:     return kernel_selector::weights_layout::image_2d_weights_c4_fyx_b;
 184     case format::image_2d_weights_c1_b_fyx:     return kernel_selector::weights_layout::image_2d_weights_c1_b_fyx;
 185     case format::winograd_2x3_s1_weights:       return kernel_selector::weights_layout::winograd_2x3_s1_weights;
 186     case format::winograd_2x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_2x3_s1_fused_weights;
 187     case format::winograd_6x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_6x3_s1_fused_weights;
 188     case format::image_2d_weights_winograd_6x3_s1_fbxyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb;
 189     case format::image_2d_weights_winograd_6x3_s1_xfbyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb;
 190     case format::os_is_yx_isa8_osv8_isv4: return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
 191     default:
 192         return kernel_selector::weights_layout::oi;
 193     }
 194 }
 195
 196 static inline cldnn::format::type from_weights_layout(kernel_selector::weights_layout l)
 197 {
 198     switch (l)
 199     {
 200     case kernel_selector::weights_layout::oi:
 201     case kernel_selector::weights_layout::oiyx:               return cldnn::format::bfyx;
 202     case kernel_selector::weights_layout::oyxi:               return cldnn::format::byxf;
 203     case kernel_selector::weights_layout::io:
 204     case kernel_selector::weights_layout::iyxo:               return cldnn::format::fyxb;
 205     case kernel_selector::weights_layout::yxio:               return cldnn::format::yxfb;
 206     case kernel_selector::weights_layout::os_iyx_osv16:       return cldnn::format::os_iyx_osv16;
 207     case kernel_selector::weights_layout::os_i_osv16:         return cldnn::format::bs_x_bsv16;
 208     case kernel_selector::weights_layout::os_i_osv8__ai8:     return cldnn::format::bs_xs_xsv8_bsv8;
 209     case kernel_selector::weights_layout::os_i_osv16__ai8:    return cldnn::format::bs_xs_xsv8_bsv16;
 210     case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b:        return cldnn::format::image_2d_weights_c4_fyx_b;
 211     case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx:        return cldnn::format::image_2d_weights_c1_b_fyx;
 212     case kernel_selector::weights_layout::winograd_2x3_s1_weights:          return cldnn::format::winograd_2x3_s1_weights;
 213     case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights:    return cldnn::format::winograd_2x3_s1_fused_weights;
 214     case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights:    return cldnn::format::winograd_6x3_s1_fused_weights;
 215     case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb:        return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
 216     case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb:        return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
 217     case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4: return cldnn::format::os_is_yx_isa8_osv8_isv4;
 218     default:
 219         return cldnn::format::bfyx;
 220     }
 221 }
 222
 223 inline kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode)
 224 {
 225     switch (mode)
 226     {
 227     case cldnn::tuning_mode::tuning_disabled:         return kernel_selector::tuning_mode::TUNING_DISABLED;
 228     case cldnn::tuning_mode::tuning_use_cache:        return kernel_selector::tuning_mode::TUNING_USE_CACHE;
 229     case cldnn::tuning_mode::tuning_tune_and_cache:   return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
 230     default:
 231         return kernel_selector::tuning_mode::TUNING_DISABLED;
 232     }
 233 }
 234
 235 inline std::string to_host_version(const cldnn::version_t& version)
 236 {
 237     std::stringstream ss;
 238     ss << version.major << "." << version.minor << "." << version.build << "." << version.revision;
 239     return ss.str();
 240 }
 241
 242 inline kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split = 1, const tensor view_offset = {})
 243 {
 244     const auto& pad = l.data_padding;
 245     const auto& vals = l.size.sizes(l.format);
 246     const auto& add_offsets = view_offset.sizes(l.format);
 247     const auto& lower_pad = pad.lower_size().sizes(l.format);
 248     const auto& upper_pad = pad.upper_size().sizes(l.format);
 249     const auto ks_layout = to_data_layout(l.format);
 250     kernel_selector::n_dims vec(kernel_selector::DataTensor::ChannelsCount(ks_layout));
 251
 252     size_t pitch = 1;
 253     size_t offset = 0;
 254
 255     auto new_vals = vals;
 256
 257     if (ks_layout == kernel_selector::Tensor::byxf_af32)
 258     {
 259         new_vals[3] = align_to(vals[3], 32);
 260     }
 261
 262     for (size_t i = 0; i < vec.size(); i++)
 263     {
 264         const size_t tensor_index = vec.size() - 1 - i;
 265         const auto d = vals[tensor_index];
 266         const auto lp = lower_pad[tensor_index];
 267         const auto up = upper_pad[tensor_index];
 268         // tells us how many elements are reserved in memory for this tensor index
 269         const auto reserved_in_mem_count = new_vals[tensor_index];
 270
 271         auto& elm = vec[i];
 272         elm.v = static_cast<size_t>(d - add_offsets[tensor_index]);
 273         elm.pitch = pitch;
 274         elm.pad.before = lp;
 275         elm.pad.after = up;
 276
 277         offset += pitch*(add_offsets[tensor_index]);
 278         pitch *= (reserved_in_mem_count + lp + up);
 279     }
 280
 281     const int feature_index = kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
 282     vec[feature_index].v /= split;
 283
 284     return kernel_selector::data_tensor(
 285         vec,
 286         to_data_type(l.data_type),
 287         ks_layout,
 288         offset);
 289 }
 290
 291 inline kernel_selector::weights_tensor convert_weights_tensor(const layout& l)
 292 {
 293     assert(l.format.dimension() == 4);
 294     const auto& t = l.size.sizes(format::bfyx);
 295     const auto base_layout = kernel_selector::weights_layout::oiyx;
 296     const auto ks_type = to_weights_type(l.data_type);
 297     const auto ks_layout = to_weights_layout(l.format);
 298     std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(base_layout));
 299
 300     for (size_t i = 0; i < vec.size(); i++)
 301     {
 302         const size_t tensor_index = t.size() - 1 - i;
 303         const auto d = t[tensor_index];
 304         vec[i] = static_cast<size_t>(d);
 305     }
 306
 307     return kernel_selector::weights_tensor(
 308         vec,
 309         ks_type,
 310         base_layout).TransformIgnorePadding(ks_layout);
 311 }
 312
 313 template <typename p_type>
 314 inline void convert_activation_func_params(const p_type primitive, kernel_selector::base_params& params)
 315 {
 316     const float negative_slope = primitive->activation_negative_slope;
 317     if (negative_slope)
 318     {
 319         params.activationParams.m = negative_slope;
 320         params.activationFunc = kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
 321     }
 322     else
 323     {
 324         params.activationFunc = kernel_selector::activation_function::RELU;
 325     }
 326 }
 327
 328 inline kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func)
 329 {
 330     switch (activation_func)
 331     {
 332     case activation_none:
 333         return kernel_selector::activation_function::NONE;
 334     case activation_logistic:
 335         return kernel_selector::activation_function::LOGISTIC;
 336     case activation_hyperbolic_tan:
 337         return kernel_selector::activation_function::HYPERBOLIC_TAN;
 338     case activation_relu:
 339         return kernel_selector::activation_function::RELU;
 340     case activation_relu_negative_slope:
 341         return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
 342     case activation_clamp:
 343         return kernel_selector::activation_function::CLAMP;
 344     case activation_softrelu:
 345         return kernel_selector::activation_function::SOFTRELU;
 346     case activation_abs:
 347         return kernel_selector::activation_function::ABS;
 348     case activation_linear:
 349         return kernel_selector::activation_function::LINEAR;
 350     case activation_square:
 351         return kernel_selector::activation_function::SQUARE;
 352     case activation_sqrt:
 353         return kernel_selector::activation_function::SQRT;
 354     case activation_elu:
 355         return kernel_selector::activation_function::ELU;
 356     default:
 357         throw std::runtime_error("Unknown activation function");
 358         break;
 359     }
 360 }
 361
 362 inline kernel_selector::activation_function get_kernel_selector_activation_grad_param(cldnn_activation_grad_func activation_grad_func)
 363 {
 364     switch (activation_grad_func)
 365     {
 366     case activation_grad_none:
 367         return kernel_selector::activation_function::NONE_GRAD;
 368     case activation_grad_relu:
 369         return kernel_selector::activation_function::RELU_GRAD;
 370     case activation_grad_relu_negative_slope:
 371         return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE_GRAD;
 372     default:
 373         throw std::runtime_error("Unknown activation_grad function");
 374         break;
 375     }
 376 }
 377
 378 template <typename arg_t>
 379 inline void convert_fused_activation_func_params(const arg_t& arg, kernel_selector::base_params& params)
 380 {
 381     params.activationParams.m = arg.get_fused_activation_params().a;
 382     params.activationParams.n = arg.get_fused_activation_params().b;
 383     params.activationFunc = get_kernel_selector_activation_param(arg.get_fused_activation_func());
 384 }
 385
 386 template <typename p_type>
 387 inline void convert_new_activation_func(const p_type primitive, kernel_selector::base_params& params)
 388 {
 389     params.activationFunc = get_kernel_selector_activation_param(primitive->activation_func);
 390     params.activationParams.m = primitive->additional_params.a;
 391     params.activationParams.n = primitive->additional_params.b;
 392 }
 393
 394 template <typename params_t, typename arg_t>
 395 inline params_t get_default_params(const arg_t& arg, uint32_t split = 1)
 396 {
 397     params_t params;
 398
 399     const auto& context = arg.get_program().get_engine().get_context();
 400     const auto& engine_info = context->get_engine_info();
 401
 402     params.engineInfo.bSubGroupSupport      = context->extension_supported("cl_intel_subgroups");
 403     params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
 404     params.engineInfo.bFP16Support          = context->extension_supported("cl_khr_fp16");
 405     params.engineInfo.bFP64Support          = context->extension_supported("cl_khr_fp64");
 406     params.engineInfo.bImageSupport         = engine_info.supports_image != 0;
 407     params.engineInfo.maxWorkGroupSize      = engine_info.max_work_group_size;
 408     params.engineInfo.maxLocalMemSize       = engine_info.max_local_mem_size;
 409     params.engineInfo.maxImage2dWidth       = engine_info.max_image2d_width;
 410     params.engineInfo.maxImage2dHeight      = engine_info.max_image2d_height;
 411     params.engineInfo.deviceId              = engine_info.dev_id;
 412     params.engineInfo.driverVersion         = engine_info.driver_version;
 413     params.engineInfo.hostVersion           = to_host_version(cldnn::get_version());
 414
 415     const auto& input_layout    = arg.input().get_output_layout();
 416     const auto& output_layout   = arg.get_output_layout();
 417
 418     params.inputs[0] = convert_data_tensor(input_layout, split);
 419     params.output = convert_data_tensor(output_layout, split);
 420
 421     params.layerID = arg.id();
 422
 423     convert_fused_activation_func_params(arg, params);
 424
 425     return params;
 426 }
 427
 428 template <typename params_t, typename arg_t>
 429 inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1)
 430 {
 431     params_t params = get_default_params<params_t>(arg, split);
 432
 433     const auto& weights_layout = arg.weights().get_output_layout();
 434     params.weights = convert_weights_tensor(weights_layout);
 435
 436     if (arg.bias_term())
 437     {
 438         const auto& bias_layout = arg.bias().get_output_layout();
 439         // bias per output is not supported on cldnn
 440         params.bias.push_back(convert_data_tensor(bias_layout).FlattenFeatureAndSpatials());
 441     }
 442
 443     return params;
 444 }
 445
 446 template <typename params_t, typename arg_t>
 447 inline params_t get_default_learning_params(const arg_t& arg, uint32_t split = 1)
 448 {
 449         params_t params = get_weights_bias_default_params<params_t>(arg, split);
 450
 451         const auto learning_params = arg.get_program().get_options().template get<build_option_type::learning_config>()->params;
 452
 453         if (arg.use_momentum())
 454         {
 455                 params.use_momentum = true;
 456         }
 457
 458         params.momentum_factor = learning_params.momentum;
 459         params.weights_decay = learning_params.weights_decay;
 460
 461         return params;
 462 }
 463
 464 template <typename optional_params_t>
 465 inline optional_params_t get_default_optional_params(const program_impl& program)
 466 {
 467     optional_params_t params;
 468
 469     const auto& context = program.get_engine().get_context();
 470
 471     params.meaningfulKernelsNames       = context->get_configuration().meaningful_kernels_names;
 472     params.allowStaticInputReordering   = program.get_options().get<build_option_type::optimize_data>()->enabled();
 473     params.allowInputReordering         = false;
 474     params.allowOutputReordering        = false;
 475
 476     const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
 477     params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
 478     params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
 479
 480     return params;
 481 }
 482
 483 template <typename optional_params_t>
 484 inline optional_params_t get_default_weights_bias_optional_params(const program_impl& program)
 485 {
 486     return get_default_optional_params<optional_params_t>(program);
 487 }
 488
 489 template <typename optional_params_t>
 490 inline optional_params_t get_default_learning_optional_params(const program_impl& program)
 491 {
 492         return get_default_weights_bias_optional_params<optional_params_t>(program);
 493 }