Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / convolution / convolution_kernel_bfyx_3x3_dw_opt.cpp
1 /*
2 // Copyright (c) 2017-2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "convolution_kernel_bfyx_3x3_dw_opt.h"
18  
19 namespace kernel_selector 
20 {
21     ConvolutionKernel_bfyx_3x3_dw_opt::ConvolutionKernel_bfyx_3x3_dw_opt() : ConvolutionKernelBase("convolution_gpu_bfyx_3x3_dw_opt")
22     {
23         // Generate the dispatch options to the auto-tuner.
24         std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14 };
25         std::vector<size_t> tileYDimSizes = { 1,2,3,4,5,6,7 };
26         std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
27
28         for (auto tileXDim : tileXDimSizes)
29         {
30             for (auto tileYDim : tileYDimSizes)
31             {
32                 for (auto executionMode : executionModes)
33                 {
34                     autoTuneOptions.emplace_back(AutoTuneOption{ {tileXDim, tileYDim}, executionMode });
35                 }
36             }
37         }
38     }
39
40     ParamsKey ConvolutionKernel_bfyx_3x3_dw_opt::GetSupportedKey() const
41     {
42         ParamsKey k;
43         k.EnableInputDataType(Datatype::F32);
44         k.EnableInputDataType(Datatype::F16);
45         k.EnableInputWeightsType(WeightsType::F16);
46         k.EnableInputWeightsType(WeightsType::F32);
47         k.EnableOutputDataType(Datatype::F32);
48         k.EnableOutputDataType(Datatype::F16);
49         k.EnableInputLayout(DataLayout::bfyx);
50         k.EnableOutputLayout(DataLayout::bfyx);
51         k.EnableTensorOffset();
52         k.EnableTensorPitches();
53         k.EnableBiasPerFeature();
54         k.EnableNonBiasTerm();
55         k.EnableBatching();
56         k.EnableSplitSupport();
57         k.EnableSubGroup();
58         k.EnableSubGroupShort();
59         k.EnableDepthwiseSeparableOpt();
60         return k;
61     }
62
63     bool ConvolutionKernel_bfyx_3x3_dw_opt::Validate(const Params& p, const optional_params& o) const
64     {
65         if (!ConvolutionKernelBase::Validate(p, o) ||
66             !CovolutionCheckInput(p, o))
67         {
68             return false;
69         }
70
71         const convolution_params& cp = static_cast<const convolution_params&>(p);
72
73         if ((cp.filterSize.x != 3) ||
74             (cp.filterSize.y != 3) ||
75             (cp.stride.x != 1) ||
76             (cp.stride.y != 1) ||
77             (cp.padding.x != 1) ||
78             (cp.padding.y != 1) ||
79             (cp.inputs[0].Feature().v != cp.split) ||
80             cp.output.PitchesDifferFromLogicalDims())
81         {
82             return false;
83         }
84
85         return true;
86     }
87
88     ConvolutionKernel_bfyx_3x3_dw_opt::AutoTuneOption ConvolutionKernel_bfyx_3x3_dw_opt::GetAutoTuneOptions(const Params&, int autoTuneIndex) const
89     {
90         if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
91         {
92             return autoTuneOptions[autoTuneIndex];
93         }
94
95         constexpr int simdSize = 16;
96
97         return AutoTuneOption{ { simdSize - 2, 7 }, DEFAULT };
98     }
99
100     ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefault(const convolution_params& params, int autoTuneIndex) const
101     {
102         constexpr int simdSize = 16;
103
104         DispatchData runInfo = Parent::SetDefault(params);
105
106         auto options = GetAutoTuneOptions(params, autoTuneIndex);
107
108         const int numTilesX = static_cast<int>(std::ceil(static_cast<float>(params.inputs[0].X().v) / static_cast<float>(options.tileDims.x)));
109         const int numTilesY = static_cast<int>(std::ceil(static_cast<float>(params.inputs[0].Y().v) / static_cast<float>(options.tileDims.y)));
110
111         runInfo.cldnnStyle.blockWidth = options.tileDims.x;
112         runInfo.cldnnStyle.blockHeight = options.tileDims.y;
113         runInfo.gws0 = numTilesX * simdSize;
114         runInfo.gws1 = numTilesY;
115         runInfo.gws2 = params.inputs[0].Feature().v * params.inputs[0].Batch().v;
116         runInfo.lws0 = simdSize;
117         runInfo.lws1 = 1;
118         runInfo.lws2 = 1;
119
120         runInfo.effiency = FORCE_PRIORITY_5;
121
122         return runInfo;
123     }
124
125     JitConstants ConvolutionKernel_bfyx_3x3_dw_opt::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
126     {
127         stSize tileDims = { kd.cldnnStyle.blockWidth, kd.cldnnStyle.blockHeight };
128         auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd);
129
130         if (tileDims.y != 0 && tileDims.x != 0)
131         {
132             mem_consts.AddConstant(MakeJitConstant("UNIT_BYTE_SIZE", kd.fp16UnitUsed ? sizeof(short) : sizeof(float)));
133             mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", kd.lws0));
134             mem_consts.AddConstant(MakeJitConstant("TILE_HEIGHT", tileDims.y));
135             mem_consts.AddConstant(MakeJitConstant("TILE_WIDTH", tileDims.x));
136         }
137
138         return mem_consts;
139     }
140
141     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
142     {
143         constexpr int simdSize = 16;
144
145         KernelData kd = KernelData::Default<convolution_params>(params);
146         convolution_params& convParams = *static_cast<convolution_params*>(kd.params.get());
147         DispatchData runInfo = SetDefault(convParams, autoTuneIndex);
148
149         if (static_cast<int>(static_cast<int>(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize > convParams.inputs[0].Y().pitch)
150         {
151             // Internal Error - requested tile size is not supported for y pitch
152             return{};
153         }
154
155         return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex);
156     }
157
158     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetKernelsData(const Params& params, const optional_params& options) const
159     {
160         return GetTunedKernelsDataByIndex(params, options, -1);
161     }
162
163     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
164     {
165         if (!Validate(params, options))
166         {
167             return{};
168         }
169
170         KernelsData res = {};
171
172         for (size_t i = 0; i < autoTuneOptions.size(); i++)
173         {
174             KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
175             if (!kd.empty())
176             {
177                 res.emplace_back(kd[0]);
178             }
179         }
180
181         KernelsData defaultKds = GetKernelsData(params, options);
182         res.insert(res.end(), defaultKds.begin(), defaultKds.end());
183
184         return res;
185     }
186 }