Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / kernel_selector_params.h
1 /*
2 // Copyright (c) 2016-2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #pragma once
18
19 #include <string>
20 #include <memory>
21 #include <cstddef>
22 #include "common_types.h"
23 #include "tensor_type.h"
24 #include "document.h"
25
26 namespace kernel_selector
27 {
28     using DataTensor = Tensor::DataTensor;
29     using WeightsTensor = Tensor::WeightsTensor;
30     using DataLayout = Tensor::DataLayout;
31     using WeightsLayout = Tensor::WeightsLayout;
32     using MultiDataTensor = std::vector<DataTensor>;
33     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
34     // ParamsKey
35     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
36     class ParamsKey
37     {
38     public:
39         ParamsKey()
40         {
41             key.restrict.raw = 0;
42             key.enableTuning = 1;
43             key.machineInfo.raw = 0;
44             key.inputType.raw = 0;
45             key.outputType.raw = 0;
46             key.inputWeightsType.raw = 0;
47             key.outputWeightsType.raw = 0;
48             key.inputLayout = 0;
49             key.outputLayout = 0;
50             key.weightsInputLayout = 0;
51             key.weightsOutputLayout = 0;
52         }
53
54         struct Key
55         {
56             union restrict_t
57             {
58                 struct val_t
59                 {
60                     uint32_t different_types : 1;
61                     uint32_t different_input_weights_types : 1;
62                     uint32_t offset : 1;
63                     uint32_t pitches : 1;
64                     uint32_t batching : 1;
65                     uint32_t biasPerFeatureMap : 1;
66                     uint32_t biasPerOutput : 1;
67                     uint32_t nonBias : 1;
68                     uint32_t activationAdditionalParamsAsInput : 1;
69                     uint32_t FP16Emulation : 1;
70                     uint32_t gradient : 1;
71                     uint32_t momentum : 1;
72
73                     union dedicated_t
74                     {
75                         struct lookt_t
76                         {
77                             uint32_t axisX : 1;
78                             uint32_t axisY : 1;
79                             uint32_t axisFeature : 1;
80                             uint32_t axisBatch : 1;
81                             uint32_t axisXYF : 1;
82                             uint32_t indicesF32 : 1;
83                             uint32_t indicesOther : 1;
84                         } lookt;
85                                                 struct argm_t
86                                                 {
87                                                         uint32_t axisX : 1;
88                                                         uint32_t axisY : 1;
89                                                         uint32_t axisFeature : 1;
90                                                         uint32_t axisBatch : 1;
91                                                         uint32_t axisXYF : 1;
92                                                 } argm;
93                         struct idxsel_t
94                         {
95                             uint32_t axisX : 1;
96                             uint32_t axisY : 1;
97                             uint32_t axisFeature : 1;
98                             uint32_t axisBatch : 1;
99                         } idxsel;
100                         struct norm_t
101                         {
102                             uint32_t across : 1;
103                             uint32_t within : 1;
104                             uint32_t fixedKenrelDivider : 1;
105                             uint32_t dynamicKenrelDivider : 1;
106                         } norm;
107                         struct mvn_t
108                         {
109                             uint32_t across : 1;
110                             uint32_t within : 1;
111                             uint32_t normalize_variance : 1;
112                         } mvn;
113                         struct pooling_t
114                         {
115                             uint32_t max : 1;
116                             uint32_t avg : 1;
117                             uint32_t floor : 1;
118                             uint32_t max_with_argmax : 1;
119                             uint32_t ceil : 1;
120                             uint32_t bilinear : 1;
121                             uint32_t fixedKenrelDivider : 1;
122                             uint32_t dynamicKenrelDivider : 1;
123                             uint32_t dynamicKenrelDividerWithPadding : 1;
124                             uint32_t position_sensitive : 1;
125                         } pooling;
126                         struct conv_t
127                         {
128                             uint32_t split : 1;
129                             uint32_t dilation : 1;
130                             uint32_t depthwise_separable_opt : 1;
131                             uint32_t transposed : 1;
132                             uint32_t quantization : 1;
133                             uint32_t calibration : 1;
134                             uint32_t local : 1;
135                             uint32_t grouped : 1;
136                         } conv;
137                         struct fc_t {} fc;
138                         struct softmax_t
139                         {
140                             uint32_t dimX : 1;
141                             uint32_t dimY : 1;
142                             uint32_t dimFeature : 1;
143                         } softmax;
144                         struct region_yolo_t
145                         {
146                             uint32_t dimX : 1;
147                             uint32_t dimY : 1;
148                             uint32_t dimFeature : 1;
149                             uint32_t coords : 1;
150                             uint32_t classes : 1;
151                             uint32_t num : 1;
152                         } region_yolo;
153                         struct reorg_yolo_t
154                         {
155                             uint32_t dimX : 1;
156                             uint32_t dimY : 1;
157                             uint32_t dimFeature : 1;
158                             uint32_t stride : 1;
159                         } reorg_yolo;
160                         struct concat_t
161                         {
162                             uint32_t axisX : 1;
163                             uint32_t axisY : 1;
164                             uint32_t axisFeature : 1;
165                             uint32_t axisBatch : 1;
166                             uint32_t kernelPerInput : 1;
167                             uint32_t oneKernel : 1;
168                         } concat;
169                         struct upsample_t
170                         {
171                             uint32_t nearest : 1;
172                             uint32_t bilinear : 1;
173                         } upsample;
174                         struct reorder_t
175                         {
176                             uint32_t winograd : 1;
177                         } reorder;
178                         struct eltwise_t
179                         {
180                             uint32_t stride : 1;
181                             uint32_t broadcast : 1;
182                         } eltwise;
183                         struct lstm_gemm_t {
184                             uint32_t bias : 1;
185                             uint32_t hidden : 1;
186                         } lstm_gemm;
187                         struct lstm_elt_t {
188                             uint32_t cell : 1;
189                         } lstm_elt;
190                         struct fused_conv_eltw_t {
191                             // conv
192                             uint32_t split : 1;
193                             uint32_t dilation : 1;
194                             uint32_t depthwise_separable_opt : 1;
195                             uint32_t transposed : 1;
196                             uint32_t quantization : 1;
197                             uint32_t calibration : 1;
198                             uint32_t local : 1;
199                             uint32_t grouped : 1;
200                             // eltw
201                             uint32_t stride : 1;
202                             // fused conv eltw
203                             uint32_t rw_out_opt : 1;
204                         } fused_conv_eltw;
205                     } dedicated;
206                 } val;
207                 uint64_t raw;
208             } restrict;
209
210             union machine_info_t
211             {
212                 struct val_t
213                 {
214                     uint32_t subgroup : 1;
215                     uint32_t subgroupShort : 1;
216                 } val;
217                 uint32_t raw;
218             } machineInfo;
219
220             static_assert(sizeof(restrict_t) == sizeof(uint64_t), "problem with union");
221
222             typedef union DataTypesKey_t
223             {
224                 struct val_t
225                 {
226                     uint32_t int8 : 1;
227                     uint32_t uint8 : 1;
228                     uint32_t int16 : 1;
229                     uint32_t uint16 : 1;
230                     uint32_t int32 : 1;
231                     uint32_t uint32 : 1;
232                     uint32_t int64 : 1;
233                     uint32_t F16 : 1;
234                     uint32_t F32 : 1;
235                 } val;
236                 uint32_t raw;
237             } DataTypesKey;
238
239             uint32_t enableTuning;
240             DataTypesKey inputType;
241             DataTypesKey outputType;
242             DataTypesKey inputWeightsType;
243             DataTypesKey outputWeightsType;
244             uint32_t inputLayout;
245             uint32_t outputLayout;
246             uint32_t weightsInputLayout;
247             uint32_t weightsOutputLayout;
248         };
249
250         void EnableInputDataType(Datatype dt);
251         void EnableAllInputDataType();
252         void EnableOutputDataType(Datatype dt);
253         void EnableAllOutputDataType();
254         void EnableInputWeightsType(WeightsType wt);
255         void EnableAllInputWeightsType();
256         void EnableOutputWeightsType(WeightsType wt);
257         void EnableAllOutputWeightsType();
258         void EnableFP16Emulation() { key.restrict.val.FP16Emulation = 1; }
259         void EnableDifferentTypes() { key.restrict.val.different_types = 1; }
260         void EnableDifferentInputWeightsTypes() {
261             key.restrict.val.different_input_weights_types = 1; }
262         void EnableInputLayout(DataLayout l) { key.inputLayout |= (1 << l); }
263         void EnableAllInputLayout() { key.inputLayout = 0xffffffff; }
264         void EnableOutputLayout(DataLayout l) { key.outputLayout |= (1 << l); }
265         void EnableAllOutputLayout() { key.outputLayout = 0xffffffff; }
266         void EnableInputWeightsLayout(WeightsLayout l) { key.weightsInputLayout |= (1 << l); }
267         void EnableAllInputWeightsLayout() { key.weightsInputLayout = 0xffffffff; }
268         void EnableOutputWeightsLayout(WeightsLayout l) { key.weightsOutputLayout |= (1 << l); }
269         void EnableAllOutputWeightsLayout() { key.weightsOutputLayout = 0xffffffff; }
270         void EnableTensorOffset() { key.restrict.val.offset = 1; }
271         void EnableTensorPitches() { key.restrict.val.pitches = 1; }
272         void EnableBatching() { key.restrict.val.batching = 1; }
273         void EnableGradient() { key.restrict.val.gradient = 1; }
274         void EnableSubGroup() { key.machineInfo.val.subgroup = 1; }
275         void EnableSubGroupShort() { key.machineInfo.val.subgroupShort = 1; }
276         void EnableNonBiasTerm() { key.restrict.val.nonBias = 1; }
277         void EnableBiasPerFeature() { key.restrict.val.biasPerFeatureMap = 1; }
278         void EnableBiasPerOutput() { key.restrict.val.biasPerOutput = 1; }
279         void EnableActivationAdditionalParamsAsInput() { key.restrict.val.activationAdditionalParamsAsInput = 1; }
280         void EnableMomentum() { key.restrict.val.momentum = 1; }
281         void EnableLRNMode(LRNMode m);
282         void EnableLookUpTableAxis(LookUpTableAxis m);
283         void EnableNormalizeMode(NormalizeMode m);
284         void EnableMVNMode(MVNMode m);
285         void EnableMVNNormalizeVariance();
286         void EnableLRNKernelDividerMode(KernelDividerMode m);
287         void EnablePoolKernelDividerMode(KernelDividerMode m);
288         void EnablePoolType(PoolType t);
289         void EnablePoolRemainder(PoolRemainder r);
290         void EnablePositionSensitivePooling() { key.restrict.val.dedicated.pooling.position_sensitive = 1; }
291         void EnableSplitSupport() { key.restrict.val.dedicated.conv.split = 1; }
292         void EnableDilation() { key.restrict.val.dedicated.conv.dilation = 1; }
293         void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwise_separable_opt = 1; }
294         void EnableLocalConvolution() { key.restrict.val.dedicated.conv.local = 1; }
295         void EnableGroupedConvolution() { key.restrict.val.dedicated.conv.grouped = 1; }
296         void EnableTranspose() { key.restrict.val.dedicated.conv.transposed = 1; }
297         void EnableInt8Quantization() { key.restrict.val.dedicated.conv.quantization = 1; }
298         void EnableOutputCalibration() { key.restrict.val.dedicated.conv.calibration = 1; }
299
300         void EnableFusedConvEltwSplitSupport() { key.restrict.val.dedicated.fused_conv_eltw.split = 1; }
301         void EnableFusedConvEltwDilation() { key.restrict.val.dedicated.fused_conv_eltw.dilation = 1; }
302         void EnableFusedConvEltwDepthwiseSeparableOpt() { key.restrict.val.dedicated.fused_conv_eltw.depthwise_separable_opt = 1; }
303         void EnableFusedConvEltwLocalConvolution() { key.restrict.val.dedicated.fused_conv_eltw.local = 1; }
304         void EnableFusedConvEltwGroupedConvolution() { key.restrict.val.dedicated.fused_conv_eltw.grouped = 1; }
305         void EnableFusedConvEltwTranspose() { key.restrict.val.dedicated.fused_conv_eltw.transposed = 1; }
306         void EnableFusedConvEltwInt8Quantization() { key.restrict.val.dedicated.fused_conv_eltw.quantization = 1; }
307         void EnableFusedConvEltwOutputCalibration() { key.restrict.val.dedicated.fused_conv_eltw.calibration = 1; }
308         void EnableFusedConvEltwEltwiseStride();
309
310         void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
311         void EnableSoftmaxDim(SoftmaxDim d);
312         void EnableConcatAxis(ConcatAxis a);
313         void EnableUpSamplingSampleType(SampleType a);
314         void EnableEltwiseStride();
315         void EnableEltwiseBroadcast() { key.restrict.val.dedicated.eltwise.broadcast = 1; }
316         void EnableLSTMGEMMBias() { key.restrict.val.dedicated.lstm_gemm.bias = 1; }
317         void EnableLSTMGEMMHidden() { key.restrict.val.dedicated.lstm_gemm.hidden = 1; }
318         void EnableLSTMEltCell() { key.restrict.val.dedicated.lstm_elt.cell = 1; }
319         void EnableConcatKernelPerInput() { key.restrict.val.dedicated.concat.kernelPerInput = 1; }
320         void DisableTuning() { key.enableTuning = 0; }
321         void EnableConcatOneKernel() { key.restrict.val.dedicated.concat.oneKernel = 1; }
322         void EnableArgMaxMinAxis(ArgMaxMinAxis a);
323         void EnableLookUpTableIndicesFormat(Datatype a);
324         void EnableIndexSelectAxis(IndexSelectAxis a);
325         void EnableFusedConvEltwiseRWOutOpt();
326         bool Support(const ParamsKey& k) const;
327         bool TuningSupport() const
328         {
329             if (key.enableTuning == 1)
330                 return true;
331             return false;
332         }
333         bool isEnabledDifferentInputWeightsTypes() const {
334             return key.restrict.val.different_input_weights_types ? true : false;
335         }
336         ParamsKey Merge(const ParamsKey& k) const;
337
338     private:
339         Key key;
340     };
341
342     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
343     // EngineInfo
344     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
345     struct EngineInfo
346     {
347         bool bSubGroupSupport = false;
348         bool bSubGroupShortSupport = false;
349         bool bFP16Support = false;
350         bool bFP64Support = false;
351         bool bImageSupport = false;
352         bool bIMADSupport = false;
353         bool bIMMADSupport = false;
354         uint32_t computeUnitsCount = 0;
355         uint64_t maxWorkGroupSize = 0;
356         uint64_t maxLocalMemSize = 0;
357         uint64_t maxImage2dWidth = 0;
358         uint64_t maxImage2dHeight = 0;
359         std::string deviceId = "";
360         std::string driverVersion = "";
361         std::string hostVersion = "";
362         std::shared_ptr<rapidjson::Document> deviceCache;
363     };
364
365     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
366     // Params
367     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
368     struct Params
369     {
370         virtual ~Params() {}
371
372         KernelType GetType() const { return kType; }
373         virtual ParamsKey GetParamsKey() const;
374
375     protected:
376         Params(KernelType kt, const std::string& id) : kType(kt), layerID(id) {}
377         KernelType kType;
378
379     public:
380         std::string layerID;
381         EngineInfo engineInfo;
382
383         virtual std::string to_string() const;
384     };
385
386     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
387     // base_activation_params
388     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
389     struct base_activation_params
390     {
391         ActivationFunction  function = ActivationFunction::NONE;
392         float m = 1.f;
393         float n = 0.f;
394
395         base_activation_params() = default;
396         base_activation_params(const float m, const float n) : m(m), n(n) {}
397
398         virtual std::string to_string() const;
399     };
400
401     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
402     // base_params
403     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
404     struct base_params : public Params
405     {
406         virtual ~base_params() {}
407
408         base_activation_params activation;
409         MultiDataTensor        inputs;
410         DataTensor             output;
411         bool                   gradient = false;
412
413         virtual std::string to_string() const;
414         virtual ParamsKey GetParamsKey() const;
415     protected:
416
417         base_params(KernelType kt) : Params(kt, ""), inputs(1){}
418     };
419
420     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
421     // Auto tuner parameters
422     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
423     class KernelRunnerInterface;
424     struct TuningParams
425     {
426         TuningMode mode;
427         std::string cacheFilePath;
428         std::shared_ptr<KernelRunnerInterface> runner;
429
430         TuningParams() : mode(TuningMode::TUNING_DISABLED), cacheFilePath(""), runner(nullptr) {}
431     };
432
433     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
434     // optional_params
435     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
436     struct optional_params
437     {
438         virtual ~optional_params() {}
439
440         KernelType GetType() const { return kType; }
441
442         std::vector<DataLayout> inputLayouts;
443         std::vector<DataLayout> outputLayouts;
444
445         bool meaningfulKernelsNames     = false;    // use layer name instead of internal kernel name
446         bool allowStaticInputReordering = true;     // allow kernel to provide a kernel which reorder static data like weights/bias/tables...
447         bool allowInputReordering       = false;    // allow kernel to ask graph compiler to reorder the input data before executing its
448         bool allowOutputReordering      = false;    // allow kernel to ask graph compiler to reorder the output data before executing the next kernel
449
450         TuningParams tuningParams;
451
452         virtual ParamsKey GetSupportedKey() const;
453     protected:
454         optional_params(KernelType kt) : kType(kt) {}
455         KernelType kType;
456     };
457 }