2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "convolution_kernel_tutorial.h"
19 namespace kernel_selector {
23 // take a look on convolution_kernel_tutorial.h
25 ParamsKey ConvolutionKernel_Tutorial::GetSupportedKey() const
28 // - Update the features supported by the kernel below
32 // Supported data type
33 k.EnableInputDataType(Datatype::F16);
34 k.EnableInputDataType(Datatype::F32);
35 k.EnableOutputDataType(Datatype::F16);
36 k.EnableOutputDataType(Datatype::F32);
37 k.EnableInputWeightsType(WeightsType::F16);
38 k.EnableInputWeightsType(WeightsType::F32);
41 k.EnableInputLayout(DataLayout::bfyx);
42 k.EnableOutputLayout(DataLayout::bfyx);
43 k.EnableInputLayout(DataLayout::yxfb);
44 k.EnableOutputLayout(DataLayout::yxfb);
46 // Supported tensor offset/pitch/padding
47 k.EnableTensorOffset();
48 k.EnableTensorPitches();
51 // Supported convolution extra data
53 k.EnableBiasPerFeature();
54 k.EnableBiasPerOutput();
55 k.EnableNonBiasTerm();
57 // Supported convolution which get a split index and uses it as a view on the input/output
58 k.EnableSplitSupport();
60 // Supported convoltuion with depth separable optimization flag
61 k.EnableDepthwiseSeparableOpt();
68 KernelsData ConvolutionKernel_Tutorial::GetKernelsData(const Params& /*params*/, const optional_params& /*options*/) const
73 // - Uncomment and update the following lines
75 // assert(params.GetType() == KernelType::CONVOLUTION && options.GetType() == KernelType::CONVOLUTION);
77 // const uint32_t numOfkernels = 1;
78 // KernelData kd = KernelData::Default<ConvolutionParams>(params, numOfkernels);
79 // ConvolutionParams& newParams = *static_cast<ConvolutionParams*>(kd.params.get());
80 // const ConvolutionOptionalParams& optParams = static_cast<const ConvolutionOptionalParams&>(options);
81 // auto& kernel = kd.kernels[0];
85 // - make sure that the input weights tensor fit to this kernel needs.
86 // in case it's not and the flag "optParams.allowWeightsReorder" set to "true", please update
87 // the member "kd.weightsReorderParams" with the right OpenCL/CPU kernel which will be used to reorder the
88 // weights in the loading time.
89 // you have three options:
90 // - provide a cpu code - inherit from "CPUKernel" and implement "Execute" function.
91 // (by default the input layout of CPU kernel is simple bfyx, and clDNN will reorder it for you before calling to Execute function)
92 // - provide a GPU code by filling clKernelData.
93 // - use existing layouts which clDNN support and use the auxiliary function "UpdateWeightsParams"
97 // - make sure that the input tensor fits to this kernel's needs.
98 // make sure that you have the proper padding area with a proper padding value, and a proper alignment.
99 // currently Convolution in clDNN doesn't allow the kernel to ask reordering
103 // - fill "kernel.kernelString"
104 // - fill "kernel.kernelString->str" - the source of the kernel.
105 // please use "db.get(kernelName)" in case you use "*.cl" file which located under "kernel_selector\core\cl_kernels\".
106 // - fill "kernel.kernelString->jit" - Dynamic jit of this params.
107 // - fill "kernel.kernelString->options" - options which pass to cl program build functions (like "-cl-no-subgroup-ifp")
108 // - fill "kernel.kernelString->entry_point" - kernel entry point
109 // - fill "kernel.kernelString->batch_compilation" - A flag that allow clDNN kernel to compile this kernel as a part of a program
110 // NOTE: this can only be used if you prevent symbol conflicts with other kernels (#undef is done automatically by clDNN)
114 // - fill "kernel.WorkGroupSizes" - local/global work group sizes for OpenCL kernel
118 // - fill "kernel.arguments" - which describe the argument of the kernel.
119 // in this tutorial you can use:
120 // kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); // "0" mean index of the input in case of multiple inputs.
121 // kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
122 // kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 });
123 // kernel.arguments.push_back({ ArgumentDescriptor::Types::BIAS, 0 });
125 // in case that you have more than one kernel, you probably need an intermediate buffers.
126 // in order to support that you have to describe the buffer size in kd.internalBufferSizes and add a kernel argument like:
127 // kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, <index to kd.internalBufferSize> });
131 // - estimate the kernel's execution time. currently it's under development so please use FORCE_PRIORITY_<X> - lower is better.
139 ConvolutionKernel_Tutorial::Parent::DispatchData ConvolutionKernel_Tutorial::SetDefault(const convolution_params& params, int autoTuneIndex) const
141 DispatchData runInfo = Parent::SetDefault(params, autoTuneIndex);
145 // Init runInfo, and set kernel efficiency
146 runInfo.effiency = TUTORIAL_PRIORITY;
151 bool ConvolutionKernel_Tutorial::Validate(const Params& p, const optional_params& o) const
153 if (!Parent::Validate(p, o))
160 // Validate this kernel support params and optional params. use:
161 // const ConvolutionParams& params = static_cast<const ConvolutionParams&>(p);
162 // const ConvolutionOptionalParams& options = static_cast<const ConvolutionOptionalParams&>(o);
167 JitConstants ConvolutionKernel_Tutorial::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
169 auto jit = Parent::GetJitConstants(params, kd);
170 jit.AddConstant(MakeJitConstant("ADVANCED_TUTORIAL", ""));
174 // Add you own jit constants. for example
175 // jit.AddConstant(MakeJitConstant("<MY_CONST>", <my val>));
176 // - "my val" can be most of KernelSelector/C++ common types
181 KernelsData ConvolutionKernel_Tutorial::GetKernelsData(const Params& params, const optional_params& options) const
183 return GetTunedKernelsDataByIndex(params, options);