Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / fused_conv_bn_scale / fused_conv_bn_scale_kernel_base.cpp
1 /*
2 // Copyright (c) 2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "fused_conv_bn_scale_kernel_base.h"
18 #include "kernel_selector_utils.h"
19 #include "common_tools.h"
20
21 namespace kernel_selector 
22 {
23     bool fused_conv_bn_scale_kernel_base::Validate(const Params& p, const optional_params& o) const
24     {
25         if (p.GetType() != KernelType::FUSED_CONV_BN_SCALE ||
26             o.GetType() != KernelType::FUSED_CONV_BN_SCALE)
27         {
28             return false;
29         }
30
31         const fused_conv_bn_scale_params& params = static_cast<const fused_conv_bn_scale_params&>(p);
32         const fused_conv_bn_scale_optional_params& optParams = static_cast<const fused_conv_bn_scale_optional_params&>(o);
33
34         bool bSupportedWeightsLayout = false;
35
36         for (WeightsLayout l : GetSupportedWeightLayouts(params))
37         {
38             bSupportedWeightsLayout |= params.weights.GetLayout() == l;
39         }
40
41         const bool bWeightsOK = bSupportedWeightsLayout || optParams.allowStaticInputReordering;
42
43         return bWeightsOK;
44     }
45
46     JitConstants fused_conv_bn_scale_kernel_base::GetJitConstants(const fused_conv_bn_scale_params& params, const DispatchData&) const
47     {
48         JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
49         const auto& padding = params.padding;
50         const auto& input = params.inputs[0];
51
52         int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() - padding.x*input.X().pitch - input.Y().pitch*padding.y;
53         input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
54
55         mem_consts.AddConstants({
56             MakeJitConstant("STRIDE",                       params.stride),
57             MakeJitConstant("PADDING",                      params.padding),
58             MakeJitConstant("FILTER_ARRAY_NUM",             params.split),
59             MakeJitConstant("DILATION",                     params.dilation),
60             MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
61             MakeJitConstant("EPSILON", params.epsilon)
62         });
63
64         if (params.fused_in_training)
65             mem_consts.AddConstant(MakeJitConstant("FUSED_TRAINING", 1));
66         if (params.scale_bias)
67             mem_consts.AddConstant(MakeJitConstant("SCALE_BIAS_TERM", 1));
68
69         return mem_consts;
70     }
71
72     bool fused_conv_bn_scale_kernel_base::CheckWorkGroups(const DispatchData& kd)
73     {
74         if (kd.gws0 == 0 ||
75             kd.gws1 == 0 ||
76             kd.gws2 == 0 ||
77             kd.lws0 == 0 ||
78             kd.lws1 == 0 ||
79             kd.lws2 == 0)
80         {
81             return false;
82         }
83
84         if ((kd.gws0 % kd.lws0) != 0 ||
85             (kd.gws1 % kd.lws1) != 0 ||
86             (kd.gws2 % kd.lws2) != 0)
87         {
88             return false;
89         }
90
91         return true;
92     }
93
94     fused_conv_bn_scale_kernel_base::DispatchData fused_conv_bn_scale_kernel_base::SetDefault(const fused_conv_bn_scale_params& params) const
95     {
96         DispatchData kd;
97
98         const auto& out = params.output;
99         kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
100         std::vector<size_t> global;
101         if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf)
102         {
103             global = { out.X().v, out.Y().v, out.Feature().v*out.Batch().v };
104         }
105         else
106         {
107             global = { out.Feature().v*out.Batch().v, out.X().v, out.Y().v };
108         }
109
110         auto local = GetOptimalLocalWorkGroupSizes(global);
111
112         kd.gws0 = global[0];
113         kd.gws1 = global[1];
114         kd.gws2 = global[2];
115
116         kd.lws0 = local[0];
117         kd.lws1 = local[1];
118         kd.lws2 = local[2];
119
120         kd.effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
121         return kd;
122     }
123
124     KernelsData fused_conv_bn_scale_kernel_base::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
125     {
126         if (!Validate(params, options))
127         {
128             return{};
129         }
130
131         KernelData kd = KernelData::Default<fused_conv_bn_scale_params>(params);
132         fused_conv_bn_scale_params& newParams = *static_cast<fused_conv_bn_scale_params*>(kd.params.get());
133
134         DispatchData runInfo = SetDefault(newParams);
135         
136         if (!CheckWorkGroups(runInfo))
137         {
138             // Internal Error - wrong calculation of global/local work group sizes
139             return{};
140         }
141
142         bool succeed = UpdateWeightsParams(
143             newParams,
144             options,
145             GetSupportedWeightLayouts(newParams),
146             kd.weightsReorderParams);
147
148         if (!succeed)
149         {
150             return{};
151         }
152
153         auto finalKernelName = GetKernelName(newParams);
154         auto cldnnJit = GetJitConstants(newParams, runInfo);
155         auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
156         auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
157
158         auto& kernel = kd.kernels[0];
159         FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, "", true, !newParams.bias.empty(), 1);
160         kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
161         uint32_t idx = 1;
162         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
163         if (newParams.scale_bias)
164             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
165         if (newParams.fused_in_training)
166         {
167             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
168             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
169             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx });
170         }
171
172         kd.estimatedTime = estimated_time;
173
174         return{ kd };
175     }
176 }