2 // Copyright (c) 2016-2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "convolution_kernel_ref.h"
18 #include "kernel_selector_utils.h"
21 namespace kernel_selector {
23 ParamsKey ConvolutionKernel_Ref::GetSupportedKey() const {
25 k.EnableInputDataType(Datatype::F16);
26 k.EnableInputDataType(Datatype::F32);
27 k.EnableInputDataType(Datatype::INT8);
28 k.EnableInputDataType(Datatype::UINT8);
29 k.EnableOutputDataType(Datatype::F16);
30 k.EnableOutputDataType(Datatype::F32);
31 k.EnableOutputDataType(Datatype::INT8);
32 k.EnableOutputDataType(Datatype::UINT8);
33 k.EnableDifferentTypes();
34 k.EnableDifferentInputWeightsTypes();
36 k.EnableInputWeightsType(WeightsType::F16);
37 k.EnableInputWeightsType(WeightsType::F32);
38 k.EnableInputWeightsType(WeightsType::INT8);
39 k.EnableInputLayout(DataLayout::bfyx);
40 k.EnableOutputLayout(DataLayout::bfyx);
41 k.EnableInputLayout(DataLayout::byxf);
42 k.EnableOutputLayout(DataLayout::byxf);
43 k.EnableInputLayout(DataLayout::yxfb);
44 k.EnableOutputLayout(DataLayout::yxfb);
45 k.EnableInputLayout(DataLayout::bfzyx);
46 k.EnableOutputLayout(DataLayout::bfzyx);
47 k.EnableInputLayout(DataLayout::bfzyx_f16);
48 k.EnableOutputLayout(DataLayout::bfzyx_f16);
49 k.EnableTensorOffset();
50 k.EnableTensorPitches();
52 k.EnableBiasPerFeature();
53 k.EnableBiasPerOutput();
54 k.EnableNonBiasTerm();
56 k.EnableSplitSupport();
57 k.EnableDepthwiseSeparableOpt();
58 k.EnableInt8Quantization();
59 k.EnableOutputCalibration();
61 k.EnableLocalConvolution();
62 k.EnableGroupedConvolution();
66 KernelsData ConvolutionKernel_Ref::GetKernelsData(const Params& params, const optional_params& options) const {
67 return GetTunedKernelsDataByIndex(params, options);
69 JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
70 JitConstants jit = ConvolutionKernelBase::GetJitConstants(params, kd);
72 // Create an ACTIVATION macro accepting type parameter - we don't have a
73 // single UNIT_TYPE for the whole kernel.
75 // TODO: This gives both ACTIVATION and ACTIVATION_TYPED. Should we
76 // factor that out into a virtual function to avoid creation of similar
77 // yet distinct macros?
78 jit.Merge(MakeActivationJitConstants(params.activations, "_CONV_TYPED", true));
79 // Needs to be done on host to get _MAX_VAL/_MIN_VAL/TO_TYPE macros
80 // available (will be used in the activation).
82 // TODO: Should it be done for all the kernels? Might even be done
83 // directly in the OpenCL include, as opposite to jitting. On the other
84 // hand, going through jit ensures we are in sync with the
85 // MakeTypeJitConstants implementation.
86 jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
88 if (params.int8_quantization && !params.bias.empty() && params.bias[0].GetDType() == Datatype::F32)
89 jit.AddConstant(MakeJitConstant("DONT_DEQUANTIZE_BIAS", "1"));
93 ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const convolution_params& params,
94 int autoTuneIndex) const {
95 DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
97 // FIXME: ConvolutionKernelBase::SetDefault should probably be pure and
98 // not setting these at all as it's something specific to a concrete
99 // implementation. Unfortunately, convolution classes are currently
100 // written in such a way that most of the logic is in the base class'
101 // method guarded by the "if" conditions (based on the layout!).
103 // Just set the correct value for a particular implementation here,
104 // until the whole hierarchy is re-written.
105 const auto& out = params.output;
106 std::vector<size_t> global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
108 auto local = GetOptimalLocalWorkGroupSizes(global);
120 bool ConvolutionKernel_Ref::Validate(const Params& params, const optional_params& options) const {
121 if (!ConvolutionKernelBase::Validate(params, options))
124 const auto& conv_params = static_cast<const convolution_params&>(params);
125 auto input_type = conv_params.inputs[0].GetDType();
126 auto output_type = conv_params.output.GetDType();
128 // The only supported u8 input is the one with quantization, would
129 // require some additional checks.
131 if (input_type == output_type && input_type != Datatype::UINT8)
134 // Otherwise, only i8/u8 -> i8/u8/fp32 convolution with i8 weights and i32 biases
135 // with quantization term is supported by now.
136 if ((input_type != Datatype::INT8 && input_type != Datatype::UINT8) ||
137 (output_type != Datatype::INT8 && output_type != Datatype::UINT8 && output_type != Datatype::F32))
140 if (!conv_params.int8_quantization)
143 if (conv_params.output_calibration)
144 // Probably everything is in place to support the case, just need to add a test.
147 if (conv_params.weights.GetDType() != WeightsType::INT8)
150 if (!conv_params.bias.empty() && conv_params.bias.front().GetDType() != Datatype::INT32)
151 // Non-quantized (FP32) bias is probably OK too, need to verify.
156 } // namespace kernel_selector