inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h

   1 /*
   2 // Copyright (c) 2016-2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #pragma once
  18
  19 #include <string>
  20 #include <memory>
  21 #include <cstddef>
  22 #include "common_types.h"
  23 #include "tensor_type.h"
  24 #include "document.h"
  25
  26 namespace kernel_selector
  27 {
  28     using DataTensor = Tensor::DataTensor;
  29     using WeightsTensor = Tensor::WeightsTensor;
  30     using DataLayout = Tensor::DataLayout;
  31     using WeightsLayout = Tensor::WeightsLayout;
  32     using MultiDataTensor = std::vector<DataTensor>;
  33     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  34     // ParamsKey
  35     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  36     class ParamsKey
  37     {
  38     public:
  39         ParamsKey()
  40         {
  41             key.restrict.raw = 0;
  42             key.enableTuning = 1;
  43             key.machineInfo.raw = 0;
  44             key.inputType.raw = 0;
  45             key.outputType.raw = 0;
  46             key.inputWeightsType.raw = 0;
  47             key.outputWeightsType.raw = 0;
  48             key.inputLayout = 0;
  49             key.outputLayout = 0;
  50             key.weightsInputLayout = 0;
  51             key.weightsOutputLayout = 0;
  52         }
  53
  54         struct Key
  55         {
  56             union restrict_t
  57             {
  58                 struct val_t
  59                 {
  60                     uint32_t different_types : 1;
  61                     uint32_t different_input_weights_types : 1;
  62                     uint32_t offset : 1;
  63                     uint32_t pitches : 1;
  64                     uint32_t batching : 1;
  65                     uint32_t biasPerFeatureMap : 1;
  66                     uint32_t biasPerOutput : 1;
  67                     uint32_t nonBias : 1;
  68                     uint32_t activationAdditionalParamsAsInput : 1;
  69                     uint32_t FP16Emulation : 1;
  70                     uint32_t gradient : 1;
  71                     uint32_t momentum : 1;
  72
  73                     union dedicated_t
  74                     {
  75                         struct lookt_t
  76                         {
  77                             uint32_t axisX : 1;
  78                             uint32_t axisY : 1;
  79                             uint32_t axisFeature : 1;
  80                             uint32_t axisBatch : 1;
  81                             uint32_t axisXYF : 1;
  82                             uint32_t indicesF32 : 1;
  83                             uint32_t indicesOther : 1;
  84                         } lookt;
  85                                                 struct argm_t
  86                                                 {
  87                                                         uint32_t axisX : 1;
  88                                                         uint32_t axisY : 1;
  89                                                         uint32_t axisFeature : 1;
  90                                                         uint32_t axisBatch : 1;
  91                                                         uint32_t axisXYF : 1;
  92                                                 } argm;
  93                         struct idxsel_t
  94                         {
  95                             uint32_t axisX : 1;
  96                             uint32_t axisY : 1;
  97                             uint32_t axisFeature : 1;
  98                             uint32_t axisBatch : 1;
  99                         } idxsel;
 100                         struct norm_t
 101                         {
 102                             uint32_t across : 1;
 103                             uint32_t within : 1;
 104                             uint32_t fixedKenrelDivider : 1;
 105                             uint32_t dynamicKenrelDivider : 1;
 106                         } norm;
 107                         struct mvn_t
 108                         {
 109                             uint32_t across : 1;
 110                             uint32_t within : 1;
 111                             uint32_t normalize_variance : 1;
 112                         } mvn;
 113                         struct pooling_t
 114                         {
 115                             uint32_t max : 1;
 116                             uint32_t avg : 1;
 117                             uint32_t floor : 1;
 118                             uint32_t max_with_argmax : 1;
 119                             uint32_t ceil : 1;
 120                             uint32_t bilinear : 1;
 121                             uint32_t fixedKenrelDivider : 1;
 122                             uint32_t dynamicKenrelDivider : 1;
 123                             uint32_t dynamicKenrelDividerWithPadding : 1;
 124                             uint32_t position_sensitive : 1;
 125                         } pooling;
 126                         struct conv_t
 127                         {
 128                             uint32_t split : 1;
 129                             uint32_t dilation : 1;
 130                             uint32_t depthwise_separable_opt : 1;
 131                             uint32_t transposed : 1;
 132                             uint32_t quantization : 1;
 133                             uint32_t calibration : 1;
 134                             uint32_t local : 1;
 135                             uint32_t grouped : 1;
 136                         } conv;
 137                         struct fc_t {} fc;
 138                         struct softmax_t
 139                         {
 140                             uint32_t dimX : 1;
 141                             uint32_t dimY : 1;
 142                             uint32_t dimFeature : 1;
 143                         } softmax;
 144                         struct region_yolo_t
 145                         {
 146                             uint32_t dimX : 1;
 147                             uint32_t dimY : 1;
 148                             uint32_t dimFeature : 1;
 149                             uint32_t coords : 1;
 150                             uint32_t classes : 1;
 151                             uint32_t num : 1;
 152                         } region_yolo;
 153                         struct reorg_yolo_t
 154                         {
 155                             uint32_t dimX : 1;
 156                             uint32_t dimY : 1;
 157                             uint32_t dimFeature : 1;
 158                             uint32_t stride : 1;
 159                         } reorg_yolo;
 160                         struct concat_t
 161                         {
 162                             uint32_t axisX : 1;
 163                             uint32_t axisY : 1;
 164                             uint32_t axisFeature : 1;
 165                             uint32_t axisBatch : 1;
 166                             uint32_t kernelPerInput : 1;
 167                             uint32_t oneKernel : 1;
 168                         } concat;
 169                         struct upsample_t
 170                         {
 171                             uint32_t nearest : 1;
 172                             uint32_t bilinear : 1;
 173                         } upsample;
 174                         struct reorder_t
 175                         {
 176                             uint32_t winograd : 1;
 177                         } reorder;
 178                         struct eltwise_t
 179                         {
 180                             uint32_t stride : 1;
 181                             uint32_t broadcast : 1;
 182                         } eltwise;
 183                         struct lstm_gemm_t {
 184                             uint32_t bias : 1;
 185                             uint32_t hidden : 1;
 186                         } lstm_gemm;
 187                         struct lstm_elt_t {
 188                             uint32_t cell : 1;
 189                         } lstm_elt;
 190                         struct fused_conv_eltw_t {
 191                             // conv
 192                             uint32_t split : 1;
 193                             uint32_t dilation : 1;
 194                             uint32_t depthwise_separable_opt : 1;
 195                             uint32_t transposed : 1;
 196                             uint32_t quantization : 1;
 197                             uint32_t calibration : 1;
 198                             uint32_t local : 1;
 199                             uint32_t grouped : 1;
 200                             // eltw
 201                             uint32_t stride : 1;
 202                             // fused conv eltw
 203                             uint32_t rw_out_opt : 1;
 204                         } fused_conv_eltw;
 205                     } dedicated;
 206                 } val;
 207                 uint64_t raw;
 208             } restrict;
 209
 210             union machine_info_t
 211             {
 212                 struct val_t
 213                 {
 214                     uint32_t subgroup : 1;
 215                     uint32_t subgroupShort : 1;
 216                 } val;
 217                 uint32_t raw;
 218             } machineInfo;
 219
 220             static_assert(sizeof(restrict_t) == sizeof(uint64_t), "problem with union");
 221
 222             typedef union DataTypesKey_t
 223             {
 224                 struct val_t
 225                 {
 226                     uint32_t int8 : 1;
 227                     uint32_t uint8 : 1;
 228                     uint32_t int16 : 1;
 229                     uint32_t uint16 : 1;
 230                     uint32_t int32 : 1;
 231                     uint32_t uint32 : 1;
 232                     uint32_t int64 : 1;
 233                     uint32_t F16 : 1;
 234                     uint32_t F32 : 1;
 235                 } val;
 236                 uint32_t raw;
 237             } DataTypesKey;
 238
 239             uint32_t enableTuning;
 240             DataTypesKey inputType;
 241             DataTypesKey outputType;
 242             DataTypesKey inputWeightsType;
 243             DataTypesKey outputWeightsType;
 244             uint32_t inputLayout;
 245             uint32_t outputLayout;
 246             uint32_t weightsInputLayout;
 247             uint32_t weightsOutputLayout;
 248         };
 249
 250         void EnableInputDataType(Datatype dt);
 251         void EnableAllInputDataType();
 252         void EnableOutputDataType(Datatype dt);
 253         void EnableAllOutputDataType();
 254         void EnableInputWeightsType(WeightsType wt);
 255         void EnableAllInputWeightsType();
 256         void EnableOutputWeightsType(WeightsType wt);
 257         void EnableAllOutputWeightsType();
 258         void EnableFP16Emulation() { key.restrict.val.FP16Emulation = 1; }
 259         void EnableDifferentTypes() { key.restrict.val.different_types = 1; }
 260         void EnableDifferentInputWeightsTypes() {
 261             key.restrict.val.different_input_weights_types = 1; }
 262         void EnableInputLayout(DataLayout l) { key.inputLayout |= (1 << l); }
 263         void EnableAllInputLayout() { key.inputLayout = 0xffffffff; }
 264         void EnableOutputLayout(DataLayout l) { key.outputLayout |= (1 << l); }
 265         void EnableAllOutputLayout() { key.outputLayout = 0xffffffff; }
 266         void EnableInputWeightsLayout(WeightsLayout l) { key.weightsInputLayout |= (1 << l); }
 267         void EnableAllInputWeightsLayout() { key.weightsInputLayout = 0xffffffff; }
 268         void EnableOutputWeightsLayout(WeightsLayout l) { key.weightsOutputLayout |= (1 << l); }
 269         void EnableAllOutputWeightsLayout() { key.weightsOutputLayout = 0xffffffff; }
 270         void EnableTensorOffset() { key.restrict.val.offset = 1; }
 271         void EnableTensorPitches() { key.restrict.val.pitches = 1; }
 272         void EnableBatching() { key.restrict.val.batching = 1; }
 273         void EnableGradient() { key.restrict.val.gradient = 1; }
 274         void EnableSubGroup() { key.machineInfo.val.subgroup = 1; }
 275         void EnableSubGroupShort() { key.machineInfo.val.subgroupShort = 1; }
 276         void EnableNonBiasTerm() { key.restrict.val.nonBias = 1; }
 277         void EnableBiasPerFeature() { key.restrict.val.biasPerFeatureMap = 1; }
 278         void EnableBiasPerOutput() { key.restrict.val.biasPerOutput = 1; }
 279         void EnableActivationAdditionalParamsAsInput() { key.restrict.val.activationAdditionalParamsAsInput = 1; }
 280         void EnableMomentum() { key.restrict.val.momentum = 1; }
 281         void EnableLRNMode(LRNMode m);
 282         void EnableLookUpTableAxis(LookUpTableAxis m);
 283         void EnableNormalizeMode(NormalizeMode m);
 284         void EnableMVNMode(MVNMode m);
 285         void EnableMVNNormalizeVariance();
 286         void EnableLRNKernelDividerMode(KernelDividerMode m);
 287         void EnablePoolKernelDividerMode(KernelDividerMode m);
 288         void EnablePoolType(PoolType t);
 289         void EnablePoolRemainder(PoolRemainder r);
 290         void EnablePositionSensitivePooling() { key.restrict.val.dedicated.pooling.position_sensitive = 1; }
 291         void EnableSplitSupport() { key.restrict.val.dedicated.conv.split = 1; }
 292         void EnableDilation() { key.restrict.val.dedicated.conv.dilation = 1; }
 293         void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwise_separable_opt = 1; }
 294         void EnableLocalConvolution() { key.restrict.val.dedicated.conv.local = 1; }
 295         void EnableGroupedConvolution() { key.restrict.val.dedicated.conv.grouped = 1; }
 296         void EnableTranspose() { key.restrict.val.dedicated.conv.transposed = 1; }
 297         void EnableInt8Quantization() { key.restrict.val.dedicated.conv.quantization = 1; }
 298         void EnableOutputCalibration() { key.restrict.val.dedicated.conv.calibration = 1; }
 299
 300         void EnableFusedConvEltwSplitSupport() { key.restrict.val.dedicated.fused_conv_eltw.split = 1; }
 301         void EnableFusedConvEltwDilation() { key.restrict.val.dedicated.fused_conv_eltw.dilation = 1; }
 302         void EnableFusedConvEltwDepthwiseSeparableOpt() { key.restrict.val.dedicated.fused_conv_eltw.depthwise_separable_opt = 1; }
 303         void EnableFusedConvEltwLocalConvolution() { key.restrict.val.dedicated.fused_conv_eltw.local = 1; }
 304         void EnableFusedConvEltwGroupedConvolution() { key.restrict.val.dedicated.fused_conv_eltw.grouped = 1; }
 305         void EnableFusedConvEltwTranspose() { key.restrict.val.dedicated.fused_conv_eltw.transposed = 1; }
 306         void EnableFusedConvEltwInt8Quantization() { key.restrict.val.dedicated.fused_conv_eltw.quantization = 1; }
 307         void EnableFusedConvEltwOutputCalibration() { key.restrict.val.dedicated.fused_conv_eltw.calibration = 1; }
 308         void EnableFusedConvEltwEltwiseStride();
 309
 310         void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
 311         void EnableSoftmaxDim(SoftmaxDim d);
 312         void EnableConcatAxis(ConcatAxis a);
 313         void EnableUpSamplingSampleType(SampleType a);
 314         void EnableEltwiseStride();
 315         void EnableEltwiseBroadcast() { key.restrict.val.dedicated.eltwise.broadcast = 1; }
 316         void EnableLSTMGEMMBias() { key.restrict.val.dedicated.lstm_gemm.bias = 1; }
 317         void EnableLSTMGEMMHidden() { key.restrict.val.dedicated.lstm_gemm.hidden = 1; }
 318         void EnableLSTMEltCell() { key.restrict.val.dedicated.lstm_elt.cell = 1; }
 319         void EnableConcatKernelPerInput() { key.restrict.val.dedicated.concat.kernelPerInput = 1; }
 320         void DisableTuning() { key.enableTuning = 0; }
 321         void EnableConcatOneKernel() { key.restrict.val.dedicated.concat.oneKernel = 1; }
 322         void EnableArgMaxMinAxis(ArgMaxMinAxis a);
 323         void EnableLookUpTableIndicesFormat(Datatype a);
 324         void EnableIndexSelectAxis(IndexSelectAxis a);
 325         void EnableFusedConvEltwiseRWOutOpt();
 326         bool Support(const ParamsKey& k) const;
 327         bool TuningSupport() const
 328         {
 329             if (key.enableTuning == 1)
 330                 return true;
 331             return false;
 332         }
 333         bool isEnabledDifferentInputWeightsTypes() const {
 334             return key.restrict.val.different_input_weights_types ? true : false;
 335         }
 336         ParamsKey Merge(const ParamsKey& k) const;
 337
 338     private:
 339         Key key;
 340     };
 341
 342     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 343     // EngineInfo
 344     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 345     struct EngineInfo
 346     {
 347         bool bSubGroupSupport = false;
 348         bool bSubGroupShortSupport = false;
 349         bool bFP16Support = false;
 350         bool bFP64Support = false;
 351         bool bImageSupport = false;
 352         bool bIMADSupport = false;
 353         bool bIMMADSupport = false;
 354         uint32_t computeUnitsCount = 0;
 355         uint64_t maxWorkGroupSize = 0;
 356         uint64_t maxLocalMemSize = 0;
 357         uint64_t maxImage2dWidth = 0;
 358         uint64_t maxImage2dHeight = 0;
 359         std::string deviceId = "";
 360         std::string driverVersion = "";
 361         std::string hostVersion = "";
 362         std::shared_ptr<rapidjson::Document> deviceCache;
 363     };
 364
 365     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 366     // Params
 367     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 368     struct Params
 369     {
 370         virtual ~Params() {}
 371
 372         KernelType GetType() const { return kType; }
 373         virtual ParamsKey GetParamsKey() const;
 374
 375     protected:
 376         Params(KernelType kt, const std::string& id) : kType(kt), layerID(id) {}
 377         KernelType kType;
 378
 379     public:
 380         std::string layerID;
 381         EngineInfo engineInfo;
 382
 383         virtual std::string to_string() const;
 384     };
 385
 386     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 387     // base_activation_params
 388     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 389     struct base_activation_params
 390     {
 391         ActivationFunction  function = ActivationFunction::NONE;
 392         float m = 1.f;
 393         float n = 0.f;
 394
 395         base_activation_params() = default;
 396         base_activation_params(const float m, const float n) : m(m), n(n) {}
 397
 398         virtual std::string to_string() const;
 399     };
 400
 401     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 402     // base_params
 403     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 404     struct base_params : public Params
 405     {
 406         virtual ~base_params() {}
 407
 408         base_activation_params activation;
 409         MultiDataTensor        inputs;
 410         DataTensor             output;
 411         bool                   gradient = false;
 412
 413         virtual std::string to_string() const;
 414         virtual ParamsKey GetParamsKey() const;
 415     protected:
 416
 417         base_params(KernelType kt) : Params(kt, ""), inputs(1){}
 418     };
 419
 420     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 421     // Auto tuner parameters
 422     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 423     class KernelRunnerInterface;
 424     struct TuningParams
 425     {
 426         TuningMode mode;
 427         std::string cacheFilePath;
 428         std::shared_ptr<KernelRunnerInterface> runner;
 429
 430         TuningParams() : mode(TuningMode::TUNING_DISABLED), cacheFilePath(""), runner(nullptr) {}
 431     };
 432
 433     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 434     // optional_params
 435     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 436     struct optional_params
 437     {
 438         virtual ~optional_params() {}
 439
 440         KernelType GetType() const { return kType; }
 441
 442         std::vector<DataLayout> inputLayouts;
 443         std::vector<DataLayout> outputLayouts;
 444
 445         bool meaningfulKernelsNames     = false;    // use layer name instead of internal kernel name
 446         bool allowStaticInputReordering = true;     // allow kernel to provide a kernel which reorder static data like weights/bias/tables...
 447         bool allowInputReordering       = false;    // allow kernel to ask graph compiler to reorder the input data before executing its
 448         bool allowOutputReordering      = false;    // allow kernel to ask graph compiler to reorder the output data before executing the next kernel
 449
 450         TuningParams tuningParams;
 451
 452         virtual ParamsKey GetSupportedKey() const;
 453     protected:
 454         optional_params(KernelType kt) : kType(kt) {}
 455         KernelType kType;
 456     };
 457 }