Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / fully_connected / fully_connected_kernel_base.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "fully_connected_kernel_base.h"
18 #include "kernel_selector_utils.h"
19 #include "common_tools.h"
20
21 namespace kernel_selector 
22 {
23     JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_params& params, const FullyConnectedKernelBase::DispatchData&) const
24     {
25         JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
26         const auto& input = params.inputs[0];
27         const auto x_size = input.LogicalSize() / input.Batch().v;
28
29         jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
30         jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization));
31
32         if (params.int8_quantization)
33         {
34             jit.AddConstants({ MakeJitConstant("W_QF", params.weights_quantization_factors[0]) });
35             jit.AddConstants({ MakeJitConstant("I_QF",params.input_quantization_factor) });
36
37             if (params.output_calibration)
38             {
39                 jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
40                 jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
41
42             }
43             else
44                 jit.AddConstants({ MakeJitConstant("O_QF",       params.output_quantization_factor) });
45         }
46
47         return jit;
48     }
49
50     FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params, int) const
51     {
52         DispatchData dispatchData;
53         dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
54
55         // Determine global work sizes.
56         dispatchData.gws0 = params.output.LogicalSize();
57         dispatchData.gws1 = dispatchData.gws2 = 1;
58
59         // Find largest positive local work size that is divider for global work size.
60         dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
61         while (dispatchData.gws0 % dispatchData.lws0 != 0)
62         {
63             --dispatchData.lws0;
64         }
65         dispatchData.lws1 = dispatchData.lws2 = 1;
66
67         return dispatchData;
68     }
69
70     KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const std::string exeMode, int autoTuneIndex) const
71     {
72         if (!Validate(params, options) ||
73             wl.empty())
74         {
75             return KernelsData();
76         }
77
78         const auto& orgParams = static_cast<const fully_connected_params&>(params);
79         const auto& orgOptParams = static_cast<const fully_connected_optional_params&>(options);
80
81         bool bProperInput = orgParams.inputs[0].GetLayout() == dl;
82         if (!bProperInput && !orgParams.inputs[0].PitchesDifferFromLogicalDims())
83         {
84             bProperInput =
85                 (dl == DataLayout::fb && orgParams.inputs[0].GetLayout() == DataLayout::fyxb) ||
86                 (dl == DataLayout::bf && orgParams.inputs[0].GetLayout() == DataLayout::bfyx);
87         }
88
89         const bool bSupportedInput = orgOptParams.allowInputReordering || bProperInput;
90
91         if (!bSupportedInput)
92         {
93             return KernelsData();
94         }
95
96         KernelData kd = KernelData::Default<fully_connected_params>(params);
97         fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());
98
99         if (!bProperInput)
100         {
101             newParams.inputs[0] = newParams.inputs[0].TransformIgnorePadding(dl);
102             kd.reorderInput = true;
103         }
104
105         bool succeed = UpdateWeightsParams(
106             newParams,
107             options,
108             wl,
109             kd.weightsReorderParams);
110
111         if (!succeed)
112         {
113             return{};
114         }
115
116         kd.kernels.resize(1);
117         
118         auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
119
120         const DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
121         auto cldnn_jit = GetJitConstants(newParams, runInfo);
122         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
123
124         auto& kernel = kd.kernels[0];
125         FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, exeMode, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
126
127         kd.estimatedTime = estimated_time;
128         kd.autoTuneIndex = autoTuneIndex;
129         return{ kd };
130     }
131
132     std::string FullyConnectedKernelBase::GetAutoTuneOptions(int autoTuneIndex) const
133     {
134         if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
135         {
136             return autoTuneOptions[autoTuneIndex];
137         }
138
139         return DEFAULT;
140 }
141
142     KernelsData FullyConnectedKernelBase::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const int autoTuneIndex) const
143     {
144         return GetCommonKernelsData(params, options, dl, wl, estimated_time, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex);
145     }
146
147 }