inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp

   1 /*
   2 // Copyright (c) 2016-2019 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "convolution_kernel_ref.h"
  18 #include "kernel_selector_utils.h"
  19 #include <vector>
  20
  21 namespace kernel_selector {
  22
  23 ParamsKey ConvolutionKernel_Ref::GetSupportedKey() const {
  24     ParamsKey k;
  25     k.EnableInputDataType(Datatype::F16);
  26     k.EnableInputDataType(Datatype::F32);
  27     k.EnableInputDataType(Datatype::INT8);
  28     k.EnableInputDataType(Datatype::UINT8);
  29     k.EnableOutputDataType(Datatype::F16);
  30     k.EnableOutputDataType(Datatype::F32);
  31     k.EnableOutputDataType(Datatype::INT8);
  32     k.EnableOutputDataType(Datatype::UINT8);
  33     k.EnableDifferentTypes();
  34     k.EnableDifferentInputWeightsTypes();
  35
  36     k.EnableInputWeightsType(WeightsType::F16);
  37     k.EnableInputWeightsType(WeightsType::F32);
  38     k.EnableInputWeightsType(WeightsType::INT8);
  39     k.EnableInputLayout(DataLayout::bfyx);
  40     k.EnableOutputLayout(DataLayout::bfyx);
  41     k.EnableInputLayout(DataLayout::byxf);
  42     k.EnableOutputLayout(DataLayout::byxf);
  43     k.EnableInputLayout(DataLayout::yxfb);
  44     k.EnableOutputLayout(DataLayout::yxfb);
  45     k.EnableInputLayout(DataLayout::bfzyx);
  46     k.EnableOutputLayout(DataLayout::bfzyx);
  47     k.EnableInputLayout(DataLayout::bfzyx_f16);
  48     k.EnableOutputLayout(DataLayout::bfzyx_f16);
  49     k.EnableTensorOffset();
  50     k.EnableTensorPitches();
  51     k.EnableDilation();
  52     k.EnableBiasPerFeature();
  53     k.EnableBiasPerOutput();
  54     k.EnableNonBiasTerm();
  55     k.EnableBatching();
  56     k.EnableSplitSupport();
  57     k.EnableDepthwiseSeparableOpt();
  58     k.EnableInt8Quantization();
  59     k.EnableOutputCalibration();
  60     k.DisableTuning();
  61     k.EnableLocalConvolution();
  62     k.EnableGroupedConvolution();
  63     return k;
  64 }
  65
  66 KernelsData ConvolutionKernel_Ref::GetKernelsData(const Params& params, const optional_params& options) const {
  67     return GetTunedKernelsDataByIndex(params, options);
  68 }
  69 JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
  70     JitConstants jit = ConvolutionKernelBase::GetJitConstants(params, kd);
  71
  72     // Create an ACTIVATION macro accepting type parameter - we don't have a
  73     // single UNIT_TYPE for the whole kernel.
  74     //
  75     // TODO: This gives both ACTIVATION and ACTIVATION_TYPED. Should we
  76     // factor that out into a virtual function to avoid creation of similar
  77     // yet distinct macros?
  78     jit.Merge(MakeActivationJitConstants(params.activations, "_CONV_TYPED", true));
  79     // Needs to be done on host to get _MAX_VAL/_MIN_VAL/TO_TYPE macros
  80     // available (will be used in the activation).
  81     //
  82     // TODO: Should it be done for all the kernels? Might even be done
  83     // directly in the OpenCL include, as opposite to jitting. On the other
  84     // hand, going through jit ensures we are in sync with the
  85     // MakeTypeJitConstants implementation.
  86     jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
  87
  88     if (params.int8_quantization && !params.bias.empty() && params.bias[0].GetDType() == Datatype::F32)
  89         jit.AddConstant(MakeJitConstant("DONT_DEQUANTIZE_BIAS", "1"));
  90
  91     return jit;
  92 }
  93 ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const convolution_params& params,
  94                                                                       int autoTuneIndex) const {
  95     DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
  96
  97     // FIXME: ConvolutionKernelBase::SetDefault should probably be pure and
  98     // not setting these at all as it's something specific to a concrete
  99     // implementation. Unfortunately, convolution classes are currently
 100     // written in such a way that most of the logic is in the base class'
 101     // method guarded by the "if" conditions (based on the layout!).
 102     //
 103     // Just set the correct value for a particular implementation here,
 104     // until the whole hierarchy is re-written.
 105     const auto& out = params.output;
 106     std::vector<size_t> global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
 107
 108     auto local = GetOptimalLocalWorkGroupSizes(global);
 109
 110     kd.gws0 = global[0];
 111     kd.gws1 = global[1];
 112     kd.gws2 = global[2];
 113
 114     kd.lws0 = local[0];
 115     kd.lws1 = local[1];
 116     kd.lws2 = local[2];
 117     return kd;
 118 }
 119
 120 bool ConvolutionKernel_Ref::Validate(const Params& params, const optional_params& options) const {
 121     if (!ConvolutionKernelBase::Validate(params, options))
 122         return false;
 123
 124     const auto& conv_params = static_cast<const convolution_params&>(params);
 125     auto input_type = conv_params.inputs[0].GetDType();
 126     auto output_type = conv_params.output.GetDType();
 127
 128     // The only supported u8 input is the one with quantization, would
 129     // require some additional checks.
 130
 131     if (input_type == output_type && input_type != Datatype::UINT8)
 132         return true;
 133
 134     // Otherwise, only i8/u8 -> i8/u8/fp32 convolution with i8 weights and i32 biases
 135     // with quantization term is supported by now.
 136     if ((input_type != Datatype::INT8 && input_type != Datatype::UINT8) ||
 137         (output_type != Datatype::INT8 && output_type != Datatype::UINT8 && output_type != Datatype::F32))
 138         return false;
 139
 140     if (!conv_params.int8_quantization)
 141         return false;
 142
 143     if (conv_params.output_calibration)
 144         // Probably everything is in place to support the case, just need to add a test.
 145         return false;
 146
 147     if (conv_params.weights.GetDType() != WeightsType::INT8)
 148         return false;
 149
 150     if (!conv_params.bias.empty() && conv_params.bias.front().GetDType() != Datatype::INT32)
 151         // Non-quantized (FP32) bias is probably OK too, need to verify.
 152         return false;
 153
 154     return true;
 155 }
 156 }  // namespace kernel_selector