inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16_depthwise.cpp

   1 // Copyright (c) 2018-2019 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 #include <iostream>
  17 #include "convolution_kernel_bfyx_f16_depthwise.h"
  18 #include "kernel_selector_utils.h"
  19 #include <string>
  20
  21 namespace kernel_selector {
  22 static const size_t sub_group_size = 16;
  23 static const size_t feature_block_size = 16;
  24
  25 ParamsKey ConvolutionKernel_bfyx_f16_depthwise::GetSupportedKey() const {
  26     ParamsKey k;
  27     k.EnableInputDataType(Datatype::F16);
  28     k.EnableInputWeightsType(WeightsType::F16);
  29     k.EnableOutputDataType(Datatype::F16);
  30     k.EnableInputDataType(Datatype::F32);
  31     k.EnableInputWeightsType(WeightsType::F32);
  32     k.EnableOutputDataType(Datatype::F32);
  33     k.EnableInputLayout(DataLayout::bfyx_f16);
  34     k.EnableOutputLayout(DataLayout::bfyx_f16);
  35     k.EnableTensorOffset();
  36     k.EnableTensorPitches();
  37     k.EnableBiasPerFeature();
  38     k.EnableNonBiasTerm();
  39     k.EnableBatching();
  40     k.EnableGroupedConvolution();
  41     k.EnableSubGroup();
  42     k.EnableSubGroupShort();
  43     k.EnableDepthwiseSeparableOpt();
  44     return k;
  45 }
  46
  47 bool ConvolutionKernel_bfyx_f16_depthwise::Validate(const Params& p, const optional_params&) const {
  48     const convolution_params& cp = static_cast<const convolution_params&>(p);
  49     if (!cp.depthwise_separable_opt || (cp.inputs[0].Feature().v != cp.split && cp.inputs[0].Feature().v != cp.groups))
  50         return false;
  51
  52     if (cp.filterSize.x != 3 || cp.filterSize.y != 3)
  53         return false;
  54
  55     if (cp.stride.x != 1 && cp.stride.x != 2)
  56         return false;
  57
  58     return true;
  59 }
  60
  61 ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_f16_depthwise::SetDefault(const convolution_params& params,
  62                                                                                      int) const {
  63     DispatchData runInfo = Parent::SetDefault(params);
  64     const auto& out = params.output;
  65
  66     runInfo.gws0 = CeilDiv(out.X().v, 8) * out.Y().v;
  67     runInfo.gws1 = Align(out.Feature().v, feature_block_size);
  68     runInfo.gws2 = out.Batch().v;
  69     runInfo.lws0 = 1;
  70     runInfo.lws1 = sub_group_size;
  71     runInfo.lws2 = 1;
  72
  73     if (out.Batch().v == 1)
  74         runInfo.effiency = FORCE_PRIORITY_1;
  75     else
  76         runInfo.effiency = FORCE_PRIORITY_7;
  77
  78     return runInfo;
  79 }
  80
  81 JitConstants ConvolutionKernel_bfyx_f16_depthwise::GetJitConstants(const convolution_params& params,
  82                                                                    const DispatchData& kd) const {
  83     auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
  84
  85     const auto block_width = 8;
  86
  87     if (params.fused_ops.size() > 0) {
  88         FusedOpsConfiguration conf_vec = {"_VEC", {"b", "(f_block*16)", "y", "x"}, "dst", block_width, true, false, true, false };
  89         FusedOpsConfiguration conf_scalar = {"_SCALAR", {"b", "(f_block*16)", "y", "(x+i)"}, "dst[i]", 1, true, false, true, false };
  90         jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec, conf_scalar}));
  91         jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
  92         jit.Merge(MakeTypeJitConstants(Datatype::F16, "half"));
  93     }
  94
  95     jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
  96     jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.output.X().v, block_width)));
  97     jit.AddConstant(MakeJitConstant("IC_BLOCK", feature_block_size));
  98     if (params.output.Feature().v % feature_block_size != 0) {
  99         jit.AddConstant(MakeJitConstant("OUTPUT_LEFTOVERS", 1));
 100     }
 101
 102     return jit;
 103 }
 104
 105 KernelsData ConvolutionKernel_bfyx_f16_depthwise::GetKernelsData(const Params& params,
 106                                                                  const optional_params& options) const {
 107     return GetCommonKernelsData(params, options);
 108 }
 109
 110 }  // namespace kernel_selector