inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "fully_connected_kernel_base.h"
  18 #include "kernel_selector_utils.h"
  19 #include "common_tools.h"
  20
  21 namespace kernel_selector
  22 {
  23     JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_params& params, const FullyConnectedKernelBase::DispatchData&) const
  24     {
  25         JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
  26         const auto& input = params.inputs[0];
  27         const auto x_size = input.LogicalSize() / input.Batch().v;
  28
  29         jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
  30         jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization));
  31
  32         if (params.int8_quantization)
  33         {
  34             jit.AddConstants({ MakeJitConstant("W_QF", params.weights_quantization_factors[0]) });
  35             jit.AddConstants({ MakeJitConstant("I_QF",params.input_quantization_factor) });
  36
  37             if (params.output_calibration)
  38             {
  39                 jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
  40                 jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
  41
  42             }
  43             else
  44                 jit.AddConstants({ MakeJitConstant("O_QF",       params.output_quantization_factor) });
  45         }
  46
  47         return jit;
  48     }
  49
  50     FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params, int) const
  51     {
  52         DispatchData dispatchData;
  53         dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
  54
  55         // Determine global work sizes.
  56         dispatchData.gws0 = params.output.LogicalSize();
  57         dispatchData.gws1 = dispatchData.gws2 = 1;
  58
  59         // Find largest positive local work size that is divider for global work size.
  60         dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
  61         while (dispatchData.gws0 % dispatchData.lws0 != 0)
  62         {
  63             --dispatchData.lws0;
  64         }
  65         dispatchData.lws1 = dispatchData.lws2 = 1;
  66
  67         return dispatchData;
  68     }
  69
  70     KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const std::string exeMode, int autoTuneIndex) const
  71     {
  72         if (!Validate(params, options) ||
  73             wl.empty())
  74         {
  75             return KernelsData();
  76         }
  77
  78         const auto& orgParams = static_cast<const fully_connected_params&>(params);
  79         const auto& orgOptParams = static_cast<const fully_connected_optional_params&>(options);
  80
  81         bool bProperInput = orgParams.inputs[0].GetLayout() == dl;
  82         if (!bProperInput && !orgParams.inputs[0].PitchesDifferFromLogicalDims())
  83         {
  84             bProperInput =
  85                 (dl == DataLayout::fb && orgParams.inputs[0].GetLayout() == DataLayout::fyxb) ||
  86                 (dl == DataLayout::bf && orgParams.inputs[0].GetLayout() == DataLayout::bfyx);
  87         }
  88
  89         const bool bSupportedInput = orgOptParams.allowInputReordering || bProperInput;
  90
  91         if (!bSupportedInput)
  92         {
  93             return KernelsData();
  94         }
  95
  96         KernelData kd = KernelData::Default<fully_connected_params>(params);
  97         fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());
  98
  99         if (!bProperInput)
 100         {
 101             newParams.inputs[0] = newParams.inputs[0].TransformIgnorePadding(dl);
 102             kd.reorderInput = true;
 103         }
 104
 105         bool succeed = UpdateWeightsParams(
 106             newParams,
 107             options,
 108             wl,
 109             kd.weightsReorderParams);
 110
 111         if (!succeed)
 112         {
 113             return{};
 114         }
 115
 116         kd.kernels.resize(1);
 117
 118         auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
 119
 120         const DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
 121         auto cldnn_jit = GetJitConstants(newParams, runInfo);
 122         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
 123
 124         auto& kernel = kd.kernels[0];
 125         FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, exeMode, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
 126
 127         kd.estimatedTime = estimated_time;
 128         kd.autoTuneIndex = autoTuneIndex;
 129         return{ kd };
 130     }
 131
 132     std::string FullyConnectedKernelBase::GetAutoTuneOptions(int autoTuneIndex) const
 133     {
 134         if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
 135         {
 136             return autoTuneOptions[autoTuneIndex];
 137         }
 138
 139         return DEFAULT;
 140 }
 141
 142     KernelsData FullyConnectedKernelBase::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const int autoTuneIndex) const
 143     {
 144         return GetCommonKernelsData(params, options, dl, wl, estimated_time, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex);
 145     }
 146
 147 }