inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp

   1 // Copyright (c) 2016 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 #include "fully_connected_kernel_base.h"
  17 #include "kernel_selector_utils.h"
  18 #include "common_tools.h"
  19 #include <string>
  20 #include <vector>
  21 #include <algorithm>
  22
  23 namespace kernel_selector {
  24 JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_params& params,
  25                                                        const FullyConnectedKernelBase::DispatchData&) const {
  26     JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
  27     const auto& input = params.inputs[0];
  28     const auto x_size = input.LogicalSize() / input.Batch().v;
  29
  30     jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size));
  31     jit.AddConstant(MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization));
  32
  33     if (params.int8_quantization) {
  34         jit.AddConstants({MakeJitConstant("W_QF", params.weights_quantization_factors[0])});
  35         jit.AddConstants({MakeJitConstant("I_QF", params.input_quantization_factor)});
  36
  37         if (params.output_calibration) {
  38             jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
  39             jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
  40
  41         } else {
  42             jit.AddConstants({MakeJitConstant("O_QF", params.output_quantization_factor)});
  43         }
  44     }
  45
  46     return jit;
  47 }
  48
  49 FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params,
  50                                                                             int) const {
  51     DispatchData dispatchData;
  52     dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
  53
  54     // Determine global work sizes.
  55     dispatchData.gws0 = params.output.LogicalSize();
  56     dispatchData.gws1 = dispatchData.gws2 = 1;
  57
  58     // Find largest positive local work size that is divider for global work size.
  59     dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
  60     while (dispatchData.gws0 % dispatchData.lws0 != 0) {
  61         --dispatchData.lws0;
  62     }
  63     dispatchData.lws1 = dispatchData.lws2 = 1;
  64
  65     return dispatchData;
  66 }
  67
  68 KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params& params,
  69                                                            const optional_params& options,
  70                                                            DataLayout dl,
  71                                                            std::vector<WeightsLayout> wl,
  72                                                            float estimated_time,
  73                                                            const std::string exeMode,
  74                                                            int autoTuneIndex) const {
  75     if (!Validate(params, options) || wl.empty()) {
  76         return KernelsData();
  77     }
  78
  79     const auto& orgParams = static_cast<const fully_connected_params&>(params);
  80     const auto& orgOptParams = static_cast<const fully_connected_optional_params&>(options);
  81
  82     bool bProperInput = orgParams.inputs[0].GetLayout() == dl;
  83     if (!bProperInput && !orgParams.inputs[0].PitchesDifferFromLogicalDims()) {
  84         bProperInput = (dl == DataLayout::fb && orgParams.inputs[0].GetLayout() == DataLayout::fyxb) ||
  85                        (dl == DataLayout::bf && orgParams.inputs[0].GetLayout() == DataLayout::bfyx);
  86     }
  87
  88     const bool bSupportedInput = orgOptParams.allowInputReordering || bProperInput;
  89
  90     if (!bSupportedInput) {
  91         return KernelsData();
  92     }
  93
  94     KernelData kd = KernelData::Default<fully_connected_params>(params);
  95     fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());
  96
  97     if (!bProperInput) {
  98         newParams.inputs[0] = newParams.inputs[0].TransformIgnorePadding(dl);
  99         kd.reorderInput = true;
 100     }
 101
 102     bool succeed = UpdateWeightsParams(newParams,
 103                                        options,
 104                                        wl,
 105                                        kd.weightsReorderParams,
 106                                        GetSupportedKey());
 107
 108     if (!succeed) {
 109         return {};
 110     }
 111
 112     kd.kernels.resize(1);
 113
 114     auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
 115
 116     const DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
 117     auto cldnn_jit = GetJitConstants(newParams, runInfo);
 118     std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
 119
 120     auto& kernel = kd.kernels[0];
 121     FillCLKernelData(kernel,
 122                      runInfo,
 123                      params.engineInfo,
 124                      kernelName,
 125                      jit,
 126                      entry_point,
 127                      exeMode,
 128                      true,
 129                      !orgParams.bias.empty(),
 130                      1,
 131                      newParams.int8_quantization,
 132                      newParams.output_calibration);
 133
 134     // TODO Pass estimated time only through DispatchData
 135     kd.estimatedTime = estimated_time;
 136     kd.autoTuneIndex = autoTuneIndex;
 137     return {kd};
 138 }
 139
 140 std::string FullyConnectedKernelBase::GetAutoTuneOptions(int autoTuneIndex) const {
 141     if ((autoTuneIndex >= 0) && (autoTuneIndex < static_cast<int>(autoTuneOptions.size()))) {
 142         return autoTuneOptions[autoTuneIndex];
 143     }
 144
 145     return DEFAULT;
 146 }
 147
 148 KernelsData FullyConnectedKernelBase::GetTunedKernelsDataByIndex(const Params& params,
 149                                                                  const optional_params& options,
 150                                                                  DataLayout dl,
 151                                                                  std::vector<WeightsLayout> wl,
 152                                                                  float estimated_time,
 153                                                                  const int autoTuneIndex) const {
 154     return GetCommonKernelsData(params,
 155                                 options,
 156                                 dl,
 157                                 wl,
 158                                 estimated_time,
 159                                 GetAutoTuneOptions(autoTuneIndex),
 160                                 autoTuneIndex);
 161 }
 162
 163 }  // namespace kernel_selector