inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "fused_conv_bn_scale_kernel_base.h"
  18 #include "kernel_selector_utils.h"
  19 #include "common_tools.h"
  20
  21 namespace kernel_selector
  22 {
  23     bool fused_conv_bn_scale_kernel_base::Validate(const Params& p, const optional_params& o) const
  24     {
  25         if (p.GetType() != KernelType::FUSED_CONV_BN_SCALE ||
  26             o.GetType() != KernelType::FUSED_CONV_BN_SCALE)
  27         {
  28             return false;
  29         }
  30
  31         const fused_conv_bn_scale_params& params = static_cast<const fused_conv_bn_scale_params&>(p);
  32         const fused_conv_bn_scale_optional_params& optParams = static_cast<const fused_conv_bn_scale_optional_params&>(o);
  33
  34         bool bSupportedWeightsLayout = false;
  35
  36         for (WeightsLayout l : GetSupportedWeightLayouts(params))
  37         {
  38             bSupportedWeightsLayout |= params.weights.GetLayout() == l;
  39         }
  40
  41         const bool bWeightsOK = bSupportedWeightsLayout || optParams.allowStaticInputReordering;
  42
  43         return bWeightsOK;
  44     }
  45
  46     JitConstants fused_conv_bn_scale_kernel_base::GetJitConstants(const fused_conv_bn_scale_params& params, const DispatchData&) const
  47     {
  48         JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
  49         const auto& padding = params.padding;
  50         const auto& input = params.inputs[0];
  51
  52         int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() - padding.x*input.X().pitch - input.Y().pitch*padding.y;
  53         input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
  54
  55         mem_consts.AddConstants({
  56             MakeJitConstant("STRIDE",                       params.stride),
  57             MakeJitConstant("PADDING",                      params.padding),
  58             MakeJitConstant("FILTER_ARRAY_NUM",             params.split),
  59             MakeJitConstant("DILATION",                     params.dilation),
  60             MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
  61             MakeJitConstant("EPSILON", params.epsilon)
  62         });
  63
  64         if (params.fused_in_training)
  65             mem_consts.AddConstant(MakeJitConstant("FUSED_TRAINING", 1));
  66         if (params.scale_bias)
  67             mem_consts.AddConstant(MakeJitConstant("SCALE_BIAS_TERM", 1));
  68
  69         return mem_consts;
  70     }
  71
  72     bool fused_conv_bn_scale_kernel_base::CheckWorkGroups(const DispatchData& kd)
  73     {
  74         if (kd.gws0 == 0 ||
  75             kd.gws1 == 0 ||
  76             kd.gws2 == 0 ||
  77             kd.lws0 == 0 ||
  78             kd.lws1 == 0 ||
  79             kd.lws2 == 0)
  80         {
  81             return false;
  82         }
  83
  84         if ((kd.gws0 % kd.lws0) != 0 ||
  85             (kd.gws1 % kd.lws1) != 0 ||
  86             (kd.gws2 % kd.lws2) != 0)
  87         {
  88             return false;
  89         }
  90
  91         return true;
  92     }
  93
  94     fused_conv_bn_scale_kernel_base::DispatchData fused_conv_bn_scale_kernel_base::SetDefault(const fused_conv_bn_scale_params& params) const
  95     {
  96         DispatchData kd;
  97
  98         const auto& out = params.output;
  99         kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
 100         std::vector<size_t> global;
 101         if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf)
 102         {
 103             global = { out.X().v, out.Y().v, out.Feature().v*out.Batch().v };
 104         }
 105         else
 106         {
 107             global = { out.Feature().v*out.Batch().v, out.X().v, out.Y().v };
 108         }
 109
 110         auto local = GetOptimalLocalWorkGroupSizes(global);
 111
 112         kd.gws0 = global[0];
 113         kd.gws1 = global[1];
 114         kd.gws2 = global[2];
 115
 116         kd.lws0 = local[0];
 117         kd.lws1 = local[1];
 118         kd.lws2 = local[2];
 119
 120         kd.effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
 121         return kd;
 122     }
 123
 124     KernelsData fused_conv_bn_scale_kernel_base::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
 125     {
 126         if (!Validate(params, options))
 127         {
 128             return{};
 129         }
 130
 131         KernelData kd = KernelData::Default<fused_conv_bn_scale_params>(params);
 132         fused_conv_bn_scale_params& newParams = *static_cast<fused_conv_bn_scale_params*>(kd.params.get());
 133
 134         DispatchData runInfo = SetDefault(newParams);
 135
 136         if (!CheckWorkGroups(runInfo))
 137         {
 138             // Internal Error - wrong calculation of global/local work group sizes
 139             return{};
 140         }
 141
 142         bool succeed = UpdateWeightsParams(
 143             newParams,
 144             options,
 145             GetSupportedWeightLayouts(newParams),
 146             kd.weightsReorderParams);
 147
 148         if (!succeed)
 149         {
 150             return{};
 151         }
 152
 153         auto finalKernelName = GetKernelName(newParams);
 154         auto cldnnJit = GetJitConstants(newParams, runInfo);
 155         auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
 156         auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
 157
 158         auto& kernel = kd.kernels[0];
 159         FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, "", true, !newParams.bias.empty(), 1);
 160         kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
 161         uint32_t idx = 1;
 162         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
 163         if (newParams.scale_bias)
 164             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
 165         if (newParams.fused_in_training)
 166         {
 167             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
 168             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
 169             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx });
 170         }
 171
 172         kd.estimatedTime = estimated_time;
 173
 174         return{ kd };
 175     }
 176 }