Publishing 2019 R3 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / actual_kernels / convolution / convolution_kernel_bfyx_f16_depthwise.cpp
1 // Copyright (c) 2018-2019 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15
16 #include <iostream>
17 #include "convolution_kernel_bfyx_f16_depthwise.h"
18 #include "kernel_selector_utils.h"
19 #include <string>
20
21 namespace kernel_selector {
22 static const size_t sub_group_size = 16;
23 static const size_t feature_block_size = 16;
24
25 ParamsKey ConvolutionKernel_bfyx_f16_depthwise::GetSupportedKey() const {
26     ParamsKey k;
27     k.EnableInputDataType(Datatype::F16);
28     k.EnableInputWeightsType(WeightsType::F16);
29     k.EnableOutputDataType(Datatype::F16);
30     k.EnableInputDataType(Datatype::F32);
31     k.EnableInputWeightsType(WeightsType::F32);
32     k.EnableOutputDataType(Datatype::F32);
33     k.EnableInputLayout(DataLayout::bfyx_f16);
34     k.EnableOutputLayout(DataLayout::bfyx_f16);
35     k.EnableTensorOffset();
36     k.EnableTensorPitches();
37     k.EnableBiasPerFeature();
38     k.EnableNonBiasTerm();
39     k.EnableBatching();
40     k.EnableGroupedConvolution();
41     k.EnableSubGroup();
42     k.EnableSubGroupShort();
43     k.EnableDepthwiseSeparableOpt();
44     return k;
45 }
46
47 bool ConvolutionKernel_bfyx_f16_depthwise::Validate(const Params& p, const optional_params&) const {
48     const convolution_params& cp = static_cast<const convolution_params&>(p);
49     if (!cp.depthwise_separable_opt || (cp.inputs[0].Feature().v != cp.split && cp.inputs[0].Feature().v != cp.groups))
50         return false;
51
52     if (cp.filterSize.x != 3 || cp.filterSize.y != 3)
53         return false;
54
55     if (cp.stride.x != 1 && cp.stride.x != 2)
56         return false;
57
58     return true;
59 }
60
61 ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_f16_depthwise::SetDefault(const convolution_params& params,
62                                                                                      int) const {
63     DispatchData runInfo = Parent::SetDefault(params);
64     const auto& out = params.output;
65
66     runInfo.gws0 = CeilDiv(out.X().v, 8) * out.Y().v;
67     runInfo.gws1 = Align(out.Feature().v, feature_block_size);
68     runInfo.gws2 = out.Batch().v;
69     runInfo.lws0 = 1;
70     runInfo.lws1 = sub_group_size;
71     runInfo.lws2 = 1;
72
73     if (out.Batch().v == 1)
74         runInfo.effiency = FORCE_PRIORITY_1;
75     else
76         runInfo.effiency = FORCE_PRIORITY_7;
77
78     return runInfo;
79 }
80
81 JitConstants ConvolutionKernel_bfyx_f16_depthwise::GetJitConstants(const convolution_params& params,
82                                                                    const DispatchData& kd) const {
83     auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
84
85     const auto block_width = 8;
86
87     if (params.fused_ops.size() > 0) {
88         FusedOpsConfiguration conf_vec = {"_VEC", {"b", "(f_block*16)", "y", "x"}, "dst", block_width, true, false, true, false };
89         FusedOpsConfiguration conf_scalar = {"_SCALAR", {"b", "(f_block*16)", "y", "(x+i)"}, "dst[i]", 1, true, false, true, false };
90         jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec, conf_scalar}));
91         jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
92         jit.Merge(MakeTypeJitConstants(Datatype::F16, "half"));
93     }
94
95     jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
96     jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.output.X().v, block_width)));
97     jit.AddConstant(MakeJitConstant("IC_BLOCK", feature_block_size));
98     if (params.output.Feature().v % feature_block_size != 0) {
99         jit.AddConstant(MakeJitConstant("OUTPUT_LEFTOVERS", 1));
100     }
101
102     return jit;
103 }
104
105 KernelsData ConvolutionKernel_bfyx_f16_depthwise::GetKernelsData(const Params& params,
106                                                                  const optional_params& options) const {
107     return GetCommonKernelsData(params, options);
108 }
109
110 }  // namespace kernel_selector