inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp

   1 /*
   2 // Copyright (c) 2017-2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "convolution_kernel_bfyx_3x3_dw_opt.h"
  18
  19 namespace kernel_selector
  20 {
  21     ConvolutionKernel_bfyx_3x3_dw_opt::ConvolutionKernel_bfyx_3x3_dw_opt() : ConvolutionKernelBase("convolution_gpu_bfyx_3x3_dw_opt")
  22     {
  23         // Generate the dispatch options to the auto-tuner.
  24         std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14 };
  25         std::vector<size_t> tileYDimSizes = { 1,2,3,4,5,6,7 };
  26         std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
  27
  28         for (auto tileXDim : tileXDimSizes)
  29         {
  30             for (auto tileYDim : tileYDimSizes)
  31             {
  32                 for (auto executionMode : executionModes)
  33                 {
  34                     autoTuneOptions.emplace_back(AutoTuneOption{ {tileXDim, tileYDim}, executionMode });
  35                 }
  36             }
  37         }
  38     }
  39
  40     ParamsKey ConvolutionKernel_bfyx_3x3_dw_opt::GetSupportedKey() const
  41     {
  42         ParamsKey k;
  43         k.EnableInputDataType(Datatype::F32);
  44         k.EnableInputDataType(Datatype::F16);
  45         k.EnableInputWeightsType(WeightsType::F16);
  46         k.EnableInputWeightsType(WeightsType::F32);
  47         k.EnableOutputDataType(Datatype::F32);
  48         k.EnableOutputDataType(Datatype::F16);
  49         k.EnableInputLayout(DataLayout::bfyx);
  50         k.EnableOutputLayout(DataLayout::bfyx);
  51         k.EnableTensorOffset();
  52         k.EnableTensorPitches();
  53         k.EnableBiasPerFeature();
  54         k.EnableNonBiasTerm();
  55         k.EnableBatching();
  56         k.EnableSplitSupport();
  57         k.EnableSubGroup();
  58         k.EnableSubGroupShort();
  59         k.EnableDepthwiseSeparableOpt();
  60         return k;
  61     }
  62
  63     bool ConvolutionKernel_bfyx_3x3_dw_opt::Validate(const Params& p, const optional_params& o) const
  64     {
  65         if (!ConvolutionKernelBase::Validate(p, o) ||
  66             !CovolutionCheckInput(p, o))
  67         {
  68             return false;
  69         }
  70
  71         const convolution_params& cp = static_cast<const convolution_params&>(p);
  72
  73         if ((cp.filterSize.x != 3) ||
  74             (cp.filterSize.y != 3) ||
  75             (cp.stride.x != 1) ||
  76             (cp.stride.y != 1) ||
  77             (cp.padding.x != 1) ||
  78             (cp.padding.y != 1) ||
  79             (cp.inputs[0].Feature().v != cp.split) ||
  80             cp.output.PitchesDifferFromLogicalDims())
  81         {
  82             return false;
  83         }
  84
  85         return true;
  86     }
  87
  88     ConvolutionKernel_bfyx_3x3_dw_opt::AutoTuneOption ConvolutionKernel_bfyx_3x3_dw_opt::GetAutoTuneOptions(const Params&, int autoTuneIndex) const
  89     {
  90         if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
  91         {
  92             return autoTuneOptions[autoTuneIndex];
  93         }
  94
  95         constexpr int simdSize = 16;
  96
  97         return AutoTuneOption{ { simdSize - 2, 7 }, DEFAULT };
  98     }
  99
 100     ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefault(const convolution_params& params, int autoTuneIndex) const
 101     {
 102         constexpr int simdSize = 16;
 103
 104         DispatchData runInfo = Parent::SetDefault(params);
 105
 106         auto options = GetAutoTuneOptions(params, autoTuneIndex);
 107
 108         const int numTilesX = static_cast<int>(std::ceil(static_cast<float>(params.inputs[0].X().v) / static_cast<float>(options.tileDims.x)));
 109         const int numTilesY = static_cast<int>(std::ceil(static_cast<float>(params.inputs[0].Y().v) / static_cast<float>(options.tileDims.y)));
 110
 111         runInfo.cldnnStyle.blockWidth = options.tileDims.x;
 112         runInfo.cldnnStyle.blockHeight = options.tileDims.y;
 113         runInfo.gws0 = numTilesX * simdSize;
 114         runInfo.gws1 = numTilesY;
 115         runInfo.gws2 = params.inputs[0].Feature().v * params.inputs[0].Batch().v;
 116         runInfo.lws0 = simdSize;
 117         runInfo.lws1 = 1;
 118         runInfo.lws2 = 1;
 119
 120         runInfo.effiency = FORCE_PRIORITY_5;
 121
 122         return runInfo;
 123     }
 124
 125     JitConstants ConvolutionKernel_bfyx_3x3_dw_opt::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
 126     {
 127         stSize tileDims = { kd.cldnnStyle.blockWidth, kd.cldnnStyle.blockHeight };
 128         auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd);
 129
 130         if (tileDims.y != 0 && tileDims.x != 0)
 131         {
 132             mem_consts.AddConstant(MakeJitConstant("UNIT_BYTE_SIZE", kd.fp16UnitUsed ? sizeof(short) : sizeof(float)));
 133             mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", kd.lws0));
 134             mem_consts.AddConstant(MakeJitConstant("TILE_HEIGHT", tileDims.y));
 135             mem_consts.AddConstant(MakeJitConstant("TILE_WIDTH", tileDims.x));
 136         }
 137
 138         return mem_consts;
 139     }
 140
 141     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
 142     {
 143         constexpr int simdSize = 16;
 144
 145         KernelData kd = KernelData::Default<convolution_params>(params);
 146         convolution_params& convParams = *static_cast<convolution_params*>(kd.params.get());
 147         DispatchData runInfo = SetDefault(convParams, autoTuneIndex);
 148
 149         if (static_cast<int>(static_cast<int>(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize > convParams.inputs[0].Y().pitch)
 150         {
 151             // Internal Error - requested tile size is not supported for y pitch
 152             return{};
 153         }
 154
 155         return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex);
 156     }
 157
 158     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetKernelsData(const Params& params, const optional_params& options) const
 159     {
 160         return GetTunedKernelsDataByIndex(params, options, -1);
 161     }
 162
 163     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
 164     {
 165         if (!Validate(params, options))
 166         {
 167             return{};
 168         }
 169
 170         KernelsData res = {};
 171
 172         for (size_t i = 0; i < autoTuneOptions.size(); i++)
 173         {
 174             KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
 175             if (!kd.empty())
 176             {
 177                 res.emplace_back(kd[0]);
 178             }
 179         }
 180
 181         KernelsData defaultKds = GetKernelsData(params, options);
 182         res.insert(res.end(), defaultKds.begin(), defaultKds.end());
 183
 184         return res;
 185     }
 186 }