inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp

   1 /*
   2 // Copyright (c) 2019 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "gather_kernel_ref.h"
  18 #include "kernel_selector_utils.h"
  19
  20 namespace kernel_selector
  21 {
  22     static int32_t GetGatherChannelIndex(const gather_params& params)
  23     {
  24         Tensor::DataChannelName name = Tensor::DataChannelName::X;
  25
  26         switch (params.axis)
  27         {
  28             case GatherAxis::X:
  29                 return 3;
  30             case GatherAxis::Y:
  31                 return 2;
  32             case GatherAxis::FEATURE:
  33                 return 1;
  34             case GatherAxis::BATCH:
  35                 return 0;
  36             default: break;
  37         }
  38
  39         return DataTensor::Channelndex(params.output.GetLayout(), name);
  40     }
  41
  42     ParamsKey GatherKernelRef::GetSupportedKey() const
  43     {
  44         ParamsKey k;
  45         k.EnableInputDataType(Datatype::F16);
  46         k.EnableInputDataType(Datatype::F32);
  47         k.EnableOutputDataType(Datatype::F16);
  48         k.EnableOutputDataType(Datatype::F32);
  49         k.EnableAllInputLayout();
  50         k.EnableAllOutputLayout();
  51         k.EnableTensorOffset();
  52         k.EnableTensorPitches();
  53         k.EnableBatching();
  54         k.EnableDifferentTypes();
  55         k.EnableLookUpTableIndicesFormat(Datatype::F32);
  56         return k;
  57     }
  58
  59     static size_t getPartSize(const gather_params& params, int32_t axis)
  60     {
  61         size_t partSize = 1;
  62         for (size_t i = params.inputs[0].Dimentions() - axis; i > 0; --i)
  63             partSize *= params.inputs[0].GetDims()[i-1].v;
  64         return partSize;
  65     }
  66
  67     static size_t getNumberOfParts(const gather_params& params, size_t partSize)
  68     {
  69         return params.inputs[0].LogicalSize() / partSize;
  70     }
  71
  72     static size_t getSliceSize(const gather_params& params, int32_t axis)
  73     {
  74         size_t numberOfItemsInSlice = 1;
  75         for (size_t i = params.inputs[0].Dimentions() - axis - 1; i > 0; --i)
  76             numberOfItemsInSlice *= params.inputs[0].GetDims()[i-1].v;
  77         return numberOfItemsInSlice;
  78     }
  79
  80     CommonDispatchData GatherKernelRef::SetDefault(const gather_params& params, const optional_params&) const
  81     {
  82         CommonDispatchData runInfo;
  83
  84         const int32_t axis = GetGatherChannelIndex(params);
  85
  86         const size_t numberOfParts = params.inputs[0].LogicalSize() / getPartSize(params, axis);
  87
  88         size_t gws = numberOfParts * params.inputs[1].LogicalSize();
  89
  90         const size_t vectorSize = 16;
  91
  92         runInfo.gws0 = Align(gws, vectorSize);
  93         runInfo.gws1 = 1;
  94         runInfo.gws2 = 1;
  95
  96         runInfo.lws0 = vectorSize;
  97         runInfo.lws1 = 1;
  98         runInfo.lws2 = 1;
  99
 100         runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 101
 102         return runInfo;
 103     }
 104
 105     JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const
 106     {
 107         JitConstants jit = MakeBaseParamsJitConstants(params);
 108
 109         int32_t axis = GetGatherChannelIndex(params);
 110         size_t partSize = getPartSize(params, axis);
 111         size_t sliceSize = getSliceSize(params, axis);
 112         size_t numberOfParts = getNumberOfParts(params, partSize);
 113         size_t numberOfIndexes = params.inputs[1].LogicalSize();
 114
 115         jit.AddConstant(MakeJitConstant("AXIS", axis));
 116         jit.AddConstant(MakeJitConstant("PART_SIZE", partSize));
 117         jit.AddConstant(MakeJitConstant("SLICE_SIZE", sliceSize));
 118         jit.AddConstant(MakeJitConstant("PARTS_NUMBER", numberOfParts));
 119         jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", numberOfParts * numberOfIndexes));
 120
 121         return jit;
 122     }
 123
 124     KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
 125     {
 126         KernelData kd = KernelData::Default<gather_params>(params);
 127         gather_params& newParams = *static_cast<gather_params*>(kd.params.get());
 128
 129         assert(params.GetType() == KernelType::GATHER);
 130
 131         auto runInfo = SetDefault(newParams, options);
 132         auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
 133         auto cldnn_jit = GetJitConstants(newParams);
 134         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
 135
 136         auto& kernel = kd.kernels[0];
 137
 138         FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
 139
 140         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
 141
 142         return{ kd };
 143     }
 144 }