inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp

   1 // Copyright (c) 2016-2020 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "softmax_kernel_items_class_optimized.h"
  16 #include "kernel_selector_utils.h"
  17
  18 namespace kernel_selector {
  19 // how many workitems we use to calculate item classes for one output, only 16 supported right now
  20 static const auto workitems_per_classes = 16;
  21
  22 ParamsKey SoftmaxKerneItemsClassOptimized::GetSupportedKey() const { return GetDefaultSupportedKey(); }
  23
  24 SoftmaxKerneItemsClassOptimized::Parent::DispatchData SoftmaxKerneItemsClassOptimized::SetDefault(
  25     const softmax_params& params,
  26     const optional_params& optParams) const {
  27     auto dispatchData = Parent::SetDefault(params, optParams);
  28
  29     auto& input = params.inputs[0];
  30
  31     size_t item_class_count = 0;
  32     const auto global = GetSoftmaxDimGlobalSizes(params.dim, params.output);
  33
  34     assert(global.size() == 3);
  35
  36     switch (params.dim) {
  37         case SoftmaxDim::X:
  38             item_class_count = input.X().v;
  39             break;
  40         case SoftmaxDim::Y:
  41             item_class_count = input.Y().v;
  42             break;
  43         case SoftmaxDim::Z:
  44             item_class_count = input.Z().v;
  45             break;
  46         case SoftmaxDim::FEATURE:
  47             item_class_count = input.Feature().v;
  48             break;
  49         default:
  50             break;
  51     }
  52
  53     dispatchData.gws[0] = global[0];
  54     dispatchData.gws[1] = global[1] * workitems_per_classes;  // we multiply it by workitems_per_classes because we split computations of
  55                                                          // one "full item classes output" into multiple workitems by "full item
  56                                                          // classes output" i mean N outputs where N is number of item classes.
  57     dispatchData.gws[2] = global[2];
  58
  59     dispatchData.lws = { 1, static_cast<size_t>(workitems_per_classes), 1 };
  60
  61     dispatchData.leftovers = item_class_count % workitems_per_classes;
  62
  63     if (item_class_count >= 32) {
  64         dispatchData.efficiency = FORCE_PRIORITY_7;
  65     } else {
  66         dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  67     }
  68
  69     return dispatchData;
  70 }
  71
  72 JitConstants SoftmaxKerneItemsClassOptimized::GetJitConstants(const softmax_params& params, DispatchData dispatchData) const {
  73     auto jit = SoftmaxItemsClassKernelBase::GetJitConstants(params, dispatchData);
  74
  75     jit.AddConstant(MakeJitConstant("WORKITEMS_PER_CLASSES", workitems_per_classes));
  76     jit.AddConstant(MakeJitConstant("HAS_DRIVER_PROBLEMS", params.engineInfo.bIMADSupport));
  77
  78     return jit;
  79 }
  80 KernelsData SoftmaxKerneItemsClassOptimized::GetKernelsData(const Params& params,
  81                                                             const optional_params& options) const {
  82     return GetCommonKernelsData(params, options);
  83 }
  84 }  // namespace kernel_selector