inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "convolution_kernel_winograd_6x3_s1_fused.h"
  18 #include "kernel_selector_utils.h"
  19
  20 namespace kernel_selector {
  21
  22     ParamsKey ConvolutionKernel_Winograd_6x3_s1_fused::GetSupportedKey() const
  23     {
  24         ParamsKey k;
  25         k.EnableInputDataType(Datatype::F16);
  26         k.EnableOutputDataType(Datatype::F16);
  27         k.EnableInputWeightsType(WeightsType::F16);
  28         k.EnableInputWeightsType(WeightsType::F32);
  29         k.EnableInputLayout(DataLayout::byxf);
  30         k.EnableOutputLayout(DataLayout::byxf);
  31         k.EnableTensorOffset();
  32         k.EnableTensorPitches();
  33         k.EnableBatching();
  34         k.EnableBiasPerFeature();
  35         k.EnableBiasPerOutput();
  36         k.EnableNonBiasTerm();
  37
  38         return k;
  39     }
  40
  41     JitConstants ConvolutionKernel_Winograd_6x3_s1_fused::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
  42     {
  43         JitConstants jit = Parent::GetJitConstants(params, runInfo);
  44
  45         const auto idepth = params.inputs[0].Feature().v;
  46         const auto input_pad_y = params.inputs[0].Y().pad.before + params.inputs[0].Y().pad.after;
  47         const auto input_pad_x = params.inputs[0].X().pad.before + params.inputs[0].X().pad.after;
  48         const auto rows = params.inputs[0].Y().v + input_pad_y;
  49         const auto cols = params.inputs[0].X().v + input_pad_x;
  50
  51         auto output_pad_x_before = params.output.GetDims()[0].pad.before;
  52         auto output_pad_y_before = params.output.GetDims()[1].pad.before;
  53         auto output_pad_x_after = params.output.GetDims()[0].pad.after;
  54         auto output_pad_y_after = params.output.GetDims()[1].pad.after;
  55         auto C4_up16 = ((uint32_t)((idepth + 15) / 16) * 16) / 4;
  56
  57                 //if there's input padding then input offset should be ignored
  58                 const auto inoffset_x = (input_pad_x) ? 0 : params.padding.x;
  59                 const auto inoffset_y = (input_pad_y) ? 0 : params.padding.y;
  60
  61         jit.AddConstants({
  62             MakeJitConstant("H", rows),
  63             MakeJitConstant("W", cols),
  64             MakeJitConstant("P", rows - 3 + 1 + output_pad_y_before + output_pad_y_after + 2 * inoffset_y),
  65             MakeJitConstant("Q", cols - 3 + 1 + output_pad_x_before + output_pad_x_after + 2 * inoffset_x),
  66             MakeJitConstant("R", 3),
  67             MakeJitConstant("S", 3),
  68             MakeJitConstant("N", 1),
  69             MakeJitConstant("px", inoffset_x),
  70             MakeJitConstant("py", inoffset_y),
  71             MakeJitConstant("sx", 1),
  72             MakeJitConstant("sy", 1),
  73             MakeJitConstant("C_", idepth),
  74
  75             MakeJitConstant("C4_up16", C4_up16),
  76             MakeJitConstant("TROWS", rows),
  77             MakeJitConstant("TCOLS", 8),
  78             MakeJitConstant("KROWSW", 3),
  79             MakeJitConstant("KCOLSW", 8),
  80         });
  81
  82         return jit;
  83     }
  84
  85     std::vector<WeightsLayout> ConvolutionKernel_Winograd_6x3_s1_fused::GetSupportedWeightLayouts(const convolution_params& params) const
  86     {
  87         //check if image weights layout will fit into device memory, if not then try to fallback to buffer
  88         if (CheckImageSize(params, WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb))
  89         {
  90             return{ WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb };
  91         }
  92         else
  93         {
  94             return{ WeightsLayout::winograd_6x3_s1_fused_weights };
  95         }
  96     }
  97
  98     ConvolutionKernel_Winograd_6x3_s1_fused::Parent::DispatchData ConvolutionKernel_Winograd_6x3_s1_fused::SetDefault(const convolution_params& arg, int) const
  99     {
 100         Parent::DispatchData runInfo = Parent::SetDefault(arg);
 101
 102         const auto odepth = arg.output.Feature().v;
 103         const auto input_pad_y = arg.inputs[0].Y().pad.before + arg.inputs[0].Y().pad.after;
 104         const auto input_pad_x = arg.inputs[0].X().pad.before + arg.inputs[0].X().pad.after;
 105         const auto rows = arg.inputs[0].Y().v + input_pad_y;
 106         const auto cols = arg.inputs[0].X().v + input_pad_x;
 107
 108                 //if there's input padding then input offset should be ignored
 109                 const auto inoffset_x = (input_pad_x) ? 0 : arg.padding.x;
 110                 const auto inoffset_y = (input_pad_y) ? 0 : arg.padding.y;
 111
 112         auto P = rows - 2 + 2 * inoffset_y;
 113         auto Q = cols - 2 + 2 * inoffset_x;
 114         auto K = odepth;
 115         auto N = 1;
 116
 117         uint32_t global_step[3] = { 14, 6, 16 * 8 };
 118         uint32_t local_size[3] = { 16, 1, 8 };
 119
 120         runInfo.gws0 = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0];
 121         runInfo.gws1 = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1];
 122         runInfo.gws2 = ((uint32_t)((N*K * 8 + global_step[2] - 1)) / global_step[2]) * local_size[2];
 123
 124         runInfo.lws0 = local_size[0];
 125         runInfo.lws1 = local_size[1];
 126         runInfo.lws2 = local_size[2];
 127
 128         runInfo.effiency = FORCE_PRIORITY_1;
 129
 130         return runInfo;
 131     }
 132
 133     bool ConvolutionKernel_Winograd_6x3_s1_fused::Validate(const Params& p, const optional_params& o) const
 134     {
 135         if (!Parent::Validate(p, o))
 136         {
 137             return false;
 138         }
 139
 140         const convolution_params& params = static_cast<const convolution_params&>(p);
 141
 142         if ((params.weights.X().v != 3) || (params.weights.Y().v != 3) ||
 143             (params.stride.x != 1) ||
 144             (params.stride.y != 1) ||
 145             (params.filterSize.x != 3) ||
 146             (params.filterSize.y != 3) ||
 147             (params.output.Feature().v % 32) ||
 148             (params.inputs[0].Feature().v % 32) ||
 149             (params.output.Feature().pad.before != 0) || (params.output.Feature().pad.after != 0) ||
 150             (params.output.Batch().pad.before != 0) || (params.output.Batch().pad.after != 0) ||
 151             //TODO: add support to batch > 1
 152             (params.inputs[0].Batch().v != 1))
 153         {
 154             return{};
 155         }
 156
 157         return true;
 158     }
 159
 160     KernelsData ConvolutionKernel_Winograd_6x3_s1_fused::GetKernelsData(const Params& params, const optional_params& options) const
 161     {
 162         return GetTunedKernelsDataByIndex(params, options);
 163     }
 164 }