2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "fully_connected_kernel_base.h"
18 #include "kernel_selector_utils.h"
19 #include "common_tools.h"
21 namespace kernel_selector
23 JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_params& params, const FullyConnectedKernelBase::DispatchData&) const
25 JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
26 const auto& input = params.inputs[0];
27 const auto x_size = input.LogicalSize() / input.Batch().v;
29 jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
30 jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization));
32 if (params.int8_quantization)
34 jit.AddConstants({ MakeJitConstant("W_QF", params.weights_quantization_factors[0]) });
35 jit.AddConstants({ MakeJitConstant("I_QF",params.input_quantization_factor) });
37 if (params.output_calibration)
39 jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
40 jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
44 jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) });
50 FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params, int) const
52 DispatchData dispatchData;
53 dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
55 // Determine global work sizes.
56 dispatchData.gws0 = params.output.LogicalSize();
57 dispatchData.gws1 = dispatchData.gws2 = 1;
59 // Find largest positive local work size that is divider for global work size.
60 dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
61 while (dispatchData.gws0 % dispatchData.lws0 != 0)
65 dispatchData.lws1 = dispatchData.lws2 = 1;
70 KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const std::string exeMode, int autoTuneIndex) const
72 if (!Validate(params, options) ||
78 const auto& orgParams = static_cast<const fully_connected_params&>(params);
79 const auto& orgOptParams = static_cast<const fully_connected_optional_params&>(options);
81 bool bProperInput = orgParams.inputs[0].GetLayout() == dl;
82 if (!bProperInput && !orgParams.inputs[0].PitchesDifferFromLogicalDims())
85 (dl == DataLayout::fb && orgParams.inputs[0].GetLayout() == DataLayout::fyxb) ||
86 (dl == DataLayout::bf && orgParams.inputs[0].GetLayout() == DataLayout::bfyx);
89 const bool bSupportedInput = orgOptParams.allowInputReordering || bProperInput;
96 KernelData kd = KernelData::Default<fully_connected_params>(params);
97 fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());
101 newParams.inputs[0] = newParams.inputs[0].TransformIgnorePadding(dl);
102 kd.reorderInput = true;
105 bool succeed = UpdateWeightsParams(
109 kd.weightsReorderParams);
116 kd.kernels.resize(1);
118 auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
120 const DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
121 auto cldnn_jit = GetJitConstants(newParams, runInfo);
122 std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
124 auto& kernel = kd.kernels[0];
125 FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, exeMode, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
127 kd.estimatedTime = estimated_time;
128 kd.autoTuneIndex = autoTuneIndex;
132 std::string FullyConnectedKernelBase::GetAutoTuneOptions(int autoTuneIndex) const
134 if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
136 return autoTuneOptions[autoTuneIndex];
142 KernelsData FullyConnectedKernelBase::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const int autoTuneIndex) const
144 return GetCommonKernelsData(params, options, dl, wl, estimated_time, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex);