[IE CLDNN] DispatchData refactoring (#2508)

author Vladimir Paramuzov <vladimir.paramuzov@intel.com>

Mon, 19 Oct 2020 15:45:05 +0000 (18:45 +0300)

committer GitHub <noreply@github.com>

Mon, 19 Oct 2020 15:45:05 +0000 (18:45 +0300)
author Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Mon, 19 Oct 2020 15:45:05 +0000 (18:45 +0300)
committer GitHub <noreply@github.com>
Mon, 19 Oct 2020 15:45:05 +0000 (18:45 +0300)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp

index 31b2041..751278c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
@@ -23,31 +23,21 @@ namespace kernel_selector {
  ActivationKernelBase::DispatchData ActivationKernelBase::SetDefault(const activation_params& arg) const {
      const auto& out = arg.output;
  
-    DispatchData runInfo;
-    std::vector<size_t> global;
-    std::vector<size_t> local;
+    DispatchData dispatchData;
      if (out.GetLayout() == DataLayout::yxfb) {
-        global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
-        local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
+        dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo);
      } else if (out.GetLayout() == DataLayout::b_fs_yx_fsv16) {
-        global = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v};
-        local = {16, 1, 1};
+        dispatchData.gws = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v};
+        dispatchData.lws = {16, 1, 1};
      } else {
-        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
-        local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
+        dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo);
      }
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
-    runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16;
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ActivationKernelBase::GetJitConstants(const activation_params& params, DispatchData) const {
@@ -94,20 +84,20 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
      activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
      const std::string kernel_id = GetEntryPoint(kernelName, params.layerID, options);
  
-    auto runInfo = SetDefault(newParams);
-    auto cldnn_jit = GetJitConstants(newParams, runInfo);
+    auto dispatchData = SetDefault(newParams);
+    auto cldnn_jit = GetJitConstants(newParams, dispatchData);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
                       DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
  
      if (!newParams.inputActivationParams.empty()) {
          kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
      }
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
  
      return {kd};
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h

index 2ae9244..3059e5d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h
@@ -65,7 +65,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    virtual JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const activation_params& arg) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp

index cbe1707..57aaba8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
@@ -38,24 +38,16 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
  }
  
  ActivationKernelOpt::Parent::DispatchData ActivationKernelOpt::SetDefault(const activation_params& params) const {
-    auto runInfo = Parent::SetDefault(params);
+    auto dispatchData = Parent::SetDefault(params);
  
      const auto totalSize = params.inputs[0].LogicalSize();
  
-    std::vector<size_t> global = {totalSize / NUM_COLS_WI};
-    std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { totalSize / NUM_COLS_WI, 1, 1 };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.efficiency = FORCE_PRIORITY_6;
  
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
-
-    runInfo.efficiency = FORCE_PRIORITY_6;
-
-    return runInfo;
+    return dispatchData;
  }
  
  bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) const {
@@ -87,8 +79,8 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
      return true;
  }
  
-JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData kd) const {
-    auto jit = ActivationKernelBase::GetJitConstants(params, kd);
+JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData dispatchData) const {
+    auto jit = ActivationKernelBase::GetJitConstants(params, dispatchData);
      auto input_dt = params.inputs[0].GetDType();
  
      jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h

index 51545be..7a4a9bc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
@@ -33,7 +33,7 @@ protected:
      static const int NUM_COLS_WI = 4;
      DispatchData SetDefault(const activation_params& arg) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return {FusedOpType::QUANTIZE,
                  FusedOpType::SCALE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp

index 89f019c..9e35b7b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
@@ -41,8 +41,8 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
      return k;
  }
  
-JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData kd) const {
-    auto jit = ActivationKernelBase::GetJitConstants(params, kd);
+JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData dispatchData) const {
+    auto jit = ActivationKernelBase::GetJitConstants(params, dispatchData);
      auto input_dt = params.inputs[0].GetDType();
  
      if (!params.fused_ops.empty()) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h

index e8e170b..0f94622 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
@@ -27,7 +27,7 @@ public:
  
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
      ParamsKey GetSupportedKey() const override;
-    JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return {FusedOpType::QUANTIZE,
                  FusedOpType::SCALE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp

index 15fc570..7e2aff5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
@@ -86,22 +86,11 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
      }
      const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
  
-    DispatchData runInfo;
-    runInfo.fp16UnitUsed = orgParams.inputs[0].GetDType() == Datatype::F16;
-
      size_t sort_size = orgParams.argMaxMinSortType == ArgMaxMinSortType::VALUE ? getSortSize(orgParams) : 1;
  
-    std::vector<size_t> local, global;
-    global = { Align(getOperationNumber(orgParams), 32), sort_size, 1 };
-    local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
+    DispatchData dispatchData;
+    dispatchData.gws = { Align(getOperationNumber(orgParams), 32), sort_size, 1 };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
      KernelData kd = KernelData::Default<arg_max_min_params>(params);
  
@@ -110,7 +99,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      if (orgParams.outputs_num == 2) {
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp

index 257e1cb..2d0c3e5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -37,20 +37,12 @@ JitConstants ArgMaxMinKernelBase::GetJitConstants(const arg_max_min_params& para
  }
  
  ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelBase::SetDefault(const arg_max_min_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    dispatchData.gws = { 128, params.inputs[0].Batch().v, 1 };
+    dispatchData.lws = { 128, 1, 1 };
  
-    // Determine global work sizes.
-    kd.gws0 = 128;
-    kd.gws1 = params.inputs[0].Batch().v;
-    kd.gws2 = 1;
-
-    kd.lws0 = 128;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
@@ -60,7 +52,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
  
      const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<arg_max_min_params>(params);
  
@@ -69,7 +61,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = estimatedTime;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp

index ac03f6f..30938a2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -37,11 +37,11 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio
  
      const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
  
-    int topK = orgParams.topK;
-    long size = (long)(orgParams.inputs[0].X().v * orgParams.inputs[0].Y().v * orgParams.inputs[0].Feature().v) / 8;
-    long outSize = size / 16 * topK;
+    size_t topK = orgParams.topK;
+    size_t size = (size_t)(orgParams.inputs[0].X().v * orgParams.inputs[0].Y().v * orgParams.inputs[0].Feature().v) / 8;
+    size_t outSize = size / 16 * topK;
      int kernelAmount = 1;
-    for (; outSize > 128; outSize = (long)((outSize / 128 + 1) * topK)) {
+    for (; outSize > 128; outSize = (size_t)((outSize / 128 + 1) * topK)) {
          kernelAmount++;
      }
      KernelData kd = KernelData::Default<arg_max_min_params>(params, kernelAmount);
@@ -57,22 +57,15 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio
          newParams.inputs[0] = input;
  
          auto& kernel = kd.kernels[i];
-        DispatchData runInfo = SetDefault(newParams);
+        DispatchData dispatchData = SetDefault(newParams);
          auto cldnnJit = GetJitConstants(newParams);
          auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
          auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
  
-        runInfo.fp16UnitUsed = orgParams.inputs[0].GetDType() == Datatype::F16;
+        dispatchData.gws = { Align(size, 16), orgParams.inputs[0].Batch().v, 1 };
+        dispatchData.lws = { 16, 1, 1 };
  
-        runInfo.gws0 = Align(size, 16);
-        runInfo.gws1 = orgParams.inputs[0].Batch().v;  // B
-        runInfo.gws2 = 1;
-
-        runInfo.lws0 = 16;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 1;
-
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
+        FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint);
          size = (size / 128 + 1) * topK;
      }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp

index b73ce22..7b6a475 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -38,32 +38,30 @@ AverageUnpoolingKernelBase::DispatchData AverageUnpoolingKernelBase::SetDefault(
      const average_unpooling_params& params) const {
      const auto& input = params.inputs[0];
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
      if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) {
          // Determine global work sizes.
-        kd.gws2 = input.Batch().v * input.Feature().v;  // B, F
-        kd.gws0 = Align(input.X().v, 32);               // X
-        kd.gws1 = input.Y().v;                          // Y
+        dispatchData.gws =  { Align(input.X().v, 32),               // X
+                              input.Y().v,                          // Y
+                              input.Batch().v * input.Feature().v,  // B, F
+                            };
  
-        kd.lws0 = 32;
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws = { 32, 1, 1 };
      } else {
          // Determine global work sizes.
-        kd.gws0 = input.Batch().v * input.Feature().v;  // B, F
-        kd.gws1 = input.X().v;                          // X
-        kd.gws2 = input.Y().v;                          // Y
-
-        kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (kd.gws0 % kd.lws0 != 0) {
-            --kd.lws0;
+        dispatchData.gws = { input.Batch().v * input.Feature().v,  // B, F
+                             input.X().v,                          // X
+                             input.Y().v };                        // Y
+
+        dispatchData.lws = {1, 1, 1};
+        dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+            --dispatchData.lws[0];
          }
-        kd.lws1 = 1;
-        kd.lws2 = 1;
      }
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
@@ -75,7 +73,7 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param
  
      const average_unpooling_params& orgParams = static_cast<const average_unpooling_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<average_unpooling_params>(params);
  
@@ -84,10 +82,10 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = estimatedTime;
  
      return {kd};
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp

index ffbeb87..1b7f0bc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp
@@ -41,27 +41,16 @@ bool BatchToSpaceKernelBase::Validate(const Params& p, const optional_params& o)
  CommonDispatchData BatchToSpaceKernelBase::SetDefault(const batch_to_space_params& params, const optional_params&) const {
      const auto& out = params.output;
  
-    CommonDispatchData runInfo;
-    std::vector<size_t> global;
-    std::vector<size_t> local;
-
+    CommonDispatchData dispatchData;
      if (out.GetLayout() == DataLayout::b_fs_yx_fsv16 && out.Feature().v % 16 == 0) {
-        global = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v };
-        local = {1, 16, 1};
+        dispatchData.gws = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v };
+        dispatchData.lws = { 1, 16, 1 };
      } else {
-        global = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v };
-        local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+        dispatchData.gws = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v };
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
      }
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants BatchToSpaceKernelBase::GetJitConstants(const batch_to_space_params& params) const {
@@ -101,14 +90,14 @@ KernelsData BatchToSpaceKernelBase::GetCommonKernelsData(const Params& params, c
          return {};
      }
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
                       "", false, false, 1, GetFusedPrimitiveInputsCount(params));
  
      kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp

index 15e6c48..9fb6b25 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp
@@ -43,10 +43,8 @@ ParamsKey BinaryConvolutionKernel1x1::GetSupportedKey() const {
      return k;
  }
  
-BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault(
-    const binary_convolution_params& params,
-    int) const {
-    DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params);
+BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault(const binary_convolution_params& params, int) const {
+    DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -55,17 +53,17 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = Align(x * y, sub_group_size);
-    kd.gws1 = CeilDiv(f, 2 * sub_group_size);  // 1 WI calcs 32 OC
-    kd.gws2 = b;
+    dispatchData.gws[0] = Align(x * y, sub_group_size);
+    dispatchData.gws[1] = CeilDiv(f, 2 * sub_group_size);  // 1 WI calcs 32 OC
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = sub_group_size;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = sub_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return kd;
+    return dispatchData;
  }
  
  bool BinaryConvolutionKernel1x1::Validate(const Params& p, const optional_params& o) const {
@@ -89,8 +87,8 @@ bool BinaryConvolutionKernel1x1::Validate(const Params& p, const optional_params
  }
  
  JitConstants BinaryConvolutionKernel1x1::GetJitConstants(const binary_convolution_params& params,
-                                                         const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                         const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
      jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size)));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h

index 7be1117..fe47b1b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h
@@ -35,9 +35,9 @@ protected:
          return WeightsLayout::os_is_yx_osv32_isv32p;
      }
      JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
-                                                const DispatchData& kd) const override;
+                                                const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp

index bf680a4..ccf6420 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp
@@ -46,7 +46,7 @@ ParamsKey BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetSupportedKey() const {
  BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1_b_fs_yx_fsv16::SetDefault(
      const binary_convolution_params& params,
      int) const {
-    DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -55,17 +55,15 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1_b_fs_yx_fsv
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = Align(x * y, sub_group_size);
-    kd.gws1 = CeilDiv(f, sub_group_size);  // 1 WI calcs 16 OC
-    kd.gws2 = b;
+    dispatchData.gws[0] = Align(x * y, sub_group_size);
+    dispatchData.gws[1] = CeilDiv(f, sub_group_size);  // 1 WI calcs 16 OC
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = sub_group_size;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws = { sub_group_size, 1, 1 };
  
-    kd.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return kd;
+    return dispatchData;
  }
  
  bool BinaryConvolutionKernel1x1_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const {
@@ -89,8 +87,8 @@ bool BinaryConvolutionKernel1x1_b_fs_yx_fsv16::Validate(const Params& p, const o
  }
  
  JitConstants BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetJitConstants(const binary_convolution_params& params,
-                                                         const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                       const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
      jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size)));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h

index 182267f..74cc9b9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h
@@ -35,9 +35,9 @@ protected:
          return WeightsLayout::os_is_yx_osv32_isv32p;
      }
      JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
-                                                const DispatchData& kd) const override;
+                                                const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp

index 3ee6895..fe6f349 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp
@@ -43,9 +43,9 @@ bool BinaryConvolutionKernelBase::Validate(const Params& p, const optional_param
  }
  
  JitConstants BinaryConvolutionKernelBase::GetJitConstants(const binary_convolution_params& params,
-                                                          const DispatchData& kd) const {
+                                                          const DispatchData& dispatchData) const {
      JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
-    jit.Merge(GetFusedPrimitivesJitConstants(params, kd));
+    jit.Merge(GetFusedPrimitivesJitConstants(params, dispatchData));
  
      jit.AddConstants({
          MakeJitConstant("STRIDE", params.stride),
@@ -63,25 +63,25 @@ JitConstants BinaryConvolutionKernelBase::GetFusedPrimitivesJitConstants(const b
      return {};
  }
  
-bool BinaryConvolutionKernelBase::CheckWorkGroups(const BinaryConvolutionKernelBase::DispatchData& kd) {
-    if (kd.gws0 == 0 || kd.gws1 == 0 || kd.gws2 == 0 || kd.lws0 == 0 || kd.lws1 == 0 || kd.lws2 == 0) {
+bool BinaryConvolutionKernelBase::CheckWorkGroups(const BinaryConvolutionKernelBase::DispatchData& dispatchData) {
+    if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3)
          return false;
-    }
  
-    if ((kd.gws0 % kd.lws0) != 0 || (kd.gws1 % kd.lws1) != 0 || (kd.gws2 % kd.lws2) != 0) {
-        return false;
+    for (size_t i = 0; i < dispatchData.gws.size(); i++) {
+        if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0)
+            return false;
+        if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0)
+            return false;
      }
  
      return true;
  }
  
-BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefault(
-    const binary_convolution_params& params,
-    int) const {
-    DispatchData kd;
+BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefault(const binary_convolution_params& params,
+                                                                                  int) const {
+    DispatchData dispatchData;
  
      const auto& out = params.output;
-    kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
      std::vector<size_t> global;
      if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf) {
          global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
@@ -91,28 +91,23 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefaul
  
      auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    kd.cldnnStyle.blockWidth = 1;
-    kd.cldnnStyle.blockHeight = 1;
-    kd.cldnnStyle.prefetch = 0;
-    kd.cldnnStyle.inputBlockArraySize = 0;
-    kd.cldnnStyle.inputBlockWidth = 0;
-
-    kd.gemmStyle.globalWorkSizeDX = 1;
-    kd.gemmStyle.globalWorkSizeDY = 1;
-    kd.gemmStyle.globalWorkSizeDZ = 1;
-    kd.gemmStyle.subBlockDimK = 1;
-    kd.gemmStyle.subBlockDimM = 0;
-    kd.gemmStyle.subBlockDimN = 0;
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    return kd;
+    dispatchData.gws = global;
+    dispatchData.lws = local;
+
+    dispatchData.cldnnStyle.blockWidth = 1;
+    dispatchData.cldnnStyle.blockHeight = 1;
+    dispatchData.cldnnStyle.prefetch = 0;
+    dispatchData.cldnnStyle.inputBlockArraySize = 0;
+    dispatchData.cldnnStyle.inputBlockWidth = 0;
+
+    dispatchData.gemmStyle.globalWorkSizeDX = 1;
+    dispatchData.gemmStyle.globalWorkSizeDY = 1;
+    dispatchData.gemmStyle.globalWorkSizeDZ = 1;
+    dispatchData.gemmStyle.subBlockDimK = 1;
+    dispatchData.gemmStyle.subBlockDimM = 0;
+    dispatchData.gemmStyle.subBlockDimN = 0;
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    return dispatchData;
  }
  
  KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& params,
@@ -129,9 +124,9 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para
      if (NeedPaddedInput()) {
          kd.reorderInput = CovolutionBinaryUpdateInputParams(newParams);
      }
-    DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
+    DispatchData dispatchData = SetDefault(newParams, autoTuneIndex);
  
-    if (!CheckWorkGroups(runInfo)) {
+    if (!CheckWorkGroups(dispatchData)) {
          // Internal Error - wrong calculation of global/local work group sizes
          return {};
      }
@@ -147,7 +142,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para
      }
  
      auto finalKernelName = GetKernelName(newParams);
-    auto cldnnJit = GetJitConstants(newParams, runInfo);
+    auto cldnnJit = GetJitConstants(newParams, dispatchData);
      auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
      auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
  
@@ -161,7 +156,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para
      }
  
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       finalKernelName,
                       jit,
@@ -173,7 +168,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para
                       fused_deps_total);
      kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
      kd.autoTuneIndex = autoTuneIndex;
  
      return {kd};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h

index ffa92e0..b8ff2d3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h
@@ -66,9 +66,9 @@ protected:
      virtual std::string GetKernelName(const binary_convolution_params&) const { return kernelName; }
      virtual bool NeedPaddedInput() const { return false; }
      bool Validate(const Params& p, const optional_params& o) const override;
-    virtual JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const;
      virtual JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
-                                                        const DispatchData& kd) const;
+                                                        const DispatchData& dispatchData) const;
      virtual DispatchData SetDefault(const binary_convolution_params& params, int autoTuneIndex = -1) const;
      static bool CheckWorkGroups(const DispatchData&);
      KernelsData GetCommonKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp

index dbbd4bc..85535b9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp
@@ -43,10 +43,9 @@ ParamsKey BinaryConvolutionKernelGeneric::GetSupportedKey() const {
      return k;
  }
  
-BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDefault(
-    const binary_convolution_params& params,
-    int) const {
-    DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params);
+BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDefault(const binary_convolution_params& params,
+                                                                                     int) const {
+    DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -55,17 +54,17 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDef
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = Align(x, sub_group_size) * y;
-    kd.gws1 = CeilDiv(f, 2 * sub_group_size);  // 1 WI calc 2 OC x 16 X
-    kd.gws2 = b;
+    dispatchData.gws[0] = Align(x, sub_group_size) * y;
+    dispatchData.gws[1] = CeilDiv(f, 2 * sub_group_size);  // 1 WI calc 2 OC x 16 X
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = sub_group_size;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = sub_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  bool BinaryConvolutionKernelGeneric::Validate(const Params& p, const optional_params& o) const {
@@ -81,8 +80,8 @@ bool BinaryConvolutionKernelGeneric::Validate(const Params& p, const optional_pa
  }
  
  JitConstants BinaryConvolutionKernelGeneric::GetJitConstants(const binary_convolution_params& params,
-                                                             const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                             const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      auto input = params.inputs[0];
      auto output = params.output;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h

index fdbc153..62f0863 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h
@@ -35,9 +35,9 @@ protected:
          return WeightsLayout::os_is_yx_osv32_isv32p;
      }
      JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
-                                                const DispatchData& kd) const override;
+                                                const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp

index 47870bb..fad9ce3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp
@@ -38,10 +38,9 @@ ParamsKey BinaryConvolutionKernelRef::GetSupportedKey() const {
      return k;
  }
  
-BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault(
-    const binary_convolution_params& params,
-    int) const {
-    DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params);
+BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault(const binary_convolution_params& params,
+                                                                                 int) const {
+    DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -50,22 +49,22 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault
      auto y = out.Y().v;
      auto x = out.X().v;
  
-    kd.gws0 = b;
-    kd.gws1 = f;
-    kd.gws2 = x * y;
+    dispatchData.gws[0] = b;
+    dispatchData.gws[1] = f;
+    dispatchData.gws[2] = x * y;
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
-    return kd;
+    return dispatchData;
  }
  
  JitConstants BinaryConvolutionKernelRef::GetJitConstants(const binary_convolution_params& params,
-                                                         const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                         const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      int pad_physical_val = params.pad_value == -1.0f ? 0x00000000 : 0xFFFFFFFF;
      int leftovers_mask = (0xFFFFFFFF >> (32 - params.inputs[0].Feature().v % 32));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h

index 7ce702f..0923186 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h
@@ -35,9 +35,9 @@ protected:
          return WeightsLayout::os_is_yx_osv32_isv32p;
      }
      JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
-                                                const DispatchData& kd) const override;
+                                                const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp

index 5f5e414..16e1c38 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -31,22 +31,12 @@ JitConstants BorderKernelBase::GetJitConstants(const border_params& params) cons
  BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    dispatchData.gws = { output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    std::vector<size_t> global{output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v};
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
@@ -57,7 +47,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
      const auto& prim_params =
          static_cast<const border_params&>(params);
  
-    auto run_info = SetDefault(prim_params);
+    auto dispatchData = SetDefault(prim_params);
      KernelData k_data = KernelData::Default<border_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -65,7 +55,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = k_data.kernels[0];
-    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      k_data.estimatedTime = estimated_time;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp

index 6c8c69f..b0b7ce1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018-2019 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -28,22 +28,12 @@ JitConstants BroadcastKernelBase::GetJitConstants(const broadcast_params& params
  BroadcastKernelBase::DispatchData BroadcastKernelBase::SetDefault(const broadcast_params& params) {
      const auto& output = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    dispatchData.gws = { output.X().v, output.Y().v * output.Z().v, output.Batch().v * output.Feature().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    std::vector<size_t> global{output.X().v, output.Y().v * output.Z().v, output.Batch().v * output.Feature().v};
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
@@ -54,7 +44,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
      const auto& prim_params =
          static_cast<const broadcast_params&>(params);
  
-    auto run_info = SetDefault(prim_params);
+    auto dispatchData = SetDefault(prim_params);
      KernelData k_data = KernelData::Default<broadcast_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -62,7 +52,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = k_data.kernels[0];
-    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
      k_data.estimatedTime = estimated_time;
  
      return {k_data};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp

index 57fc050..1cc9811 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp
@@ -109,23 +109,23 @@ bool ConcatenationKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional
  }
  
  ConcatenationKernelBase::DispatchData ConcatenationKernel_b_fs_yx_fsv16::SetDefault(const concatenation_params& params) const {
-    DispatchData runInfo = ConcatenationKernelBase::SetDefault(params);
+    DispatchData dispatchData = ConcatenationKernelBase::SetDefault(params);
      const auto& input = params.inputs[0];
      auto tileXY = getTileXY(params);
  
      size_t tileF = params.misalignment == 0 ? 1 : 2;
  
-    runInfo.gws0 = CeilDiv(input.X().v * input.Y().v, tileXY);
-    runInfo.gws1 = Align(input.Feature().v, 16 * tileF) / tileF;
-    runInfo.gws2 = input.Batch().v;
+    dispatchData.gws[0] = CeilDiv(input.X().v * input.Y().v, tileXY);
+    dispatchData.gws[1] = Align(input.Feature().v, 16 * tileF) / tileF;
+    dispatchData.gws[2] = input.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 16;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 16;
+    dispatchData.lws[2] = 1;
  
-    runInfo.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConcatenationKernel_b_fs_yx_fsv16::GetJitConstants(const concatenation_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp

index 0eb3fb2..b70ac9f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
@@ -69,7 +69,7 @@ JitConstants ConcatenationKernelBase::GetJitConstants(const concatenation_params
  }
  
  ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const concatenation_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& dims = params.inputs[0].GetDims();
      auto layout = params.inputs[0].GetLayout();
@@ -80,19 +80,19 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const
                               DataTensor::Channelndex(layout, Tensor::DataChannelName::X) };
  
      // Determine global work sizes.
-    kd.gws0 = idx[2] != -1 ? dims[idx[2]].v : 1;  // Y
-    kd.gws1 = idx[1] != -1 ? dims[idx[1]].v : 1;  // F
-    kd.gws2 = idx[0] != -1 ? dims[idx[0]].v : 1;  // B
+    dispatchData.gws[0] = idx[2] != -1 ? dims[idx[2]].v : 1;  // Y
+    dispatchData.gws[1] = idx[1] != -1 ? dims[idx[1]].v : 1;  // F
+    dispatchData.gws[2] = idx[0] != -1 ? dims[idx[0]].v : 1;  // B
  
-    kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-    while (kd.gws0 % kd.lws0 != 0) {
-        --kd.lws0;
+    dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+    while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+        --dispatchData.lws[0];
      }
  
-    kd.lws1 = 1;
-    kd.lws2 = 1;
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    return kd;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    return dispatchData;
  }
  
  KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
@@ -120,13 +120,13 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
          ifm_offset += ifm;
  
          auto& kernel = kd.kernels[i];
-        DispatchData runInfo = SetDefault(newParams);
+        DispatchData dispatchData = SetDefault(newParams);
          auto cldnnJit = GetJitConstants(newParams);
          auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
          auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
  
-        kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-        kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+        kernel.workGroups.global = dispatchData.gws;
+        kernel.workGroups.local = dispatchData.lws;
          kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i });
          kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
@@ -138,7 +138,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
          kernel.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0});
  
          lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v;
-        efficiency = std::max(efficiency, runInfo.efficiency);
+        efficiency = std::max(efficiency, dispatchData.efficiency);
      }
  
      kd.estimatedTime = efficiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp

index 62e5a65..b5046ff 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp
@@ -67,22 +67,21 @@ bool ConcatenationKernel_depth_bfyx_no_pitch::Validate(const Params& p, const op
      return true;
  }
  
-ConcatenationKernelBase::DispatchData ConcatenationKernel_depth_bfyx_no_pitch::SetDefault(
-    const concatenation_params& params) const {
-    DispatchData runInfo = ConcatenationKernelBase::SetDefault(params);
+ConcatenationKernelBase::DispatchData ConcatenationKernel_depth_bfyx_no_pitch::SetDefault(const concatenation_params& params) const {
+    DispatchData dispatchData = ConcatenationKernelBase::SetDefault(params);
      const auto& input = params.inputs[0];
      const auto batch = input.Batch().v;
-    runInfo.gws0 = batch;
-    runInfo.gws1 = Align(std::max((size_t)1, input.LogicalSize() / batch), 16 * 8) / 8;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = batch;
+    dispatchData.gws[1] = Align(std::max((size_t)1, input.LogicalSize() / batch), 16 * 8) / 8;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 16;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 16;
+    dispatchData.lws[2] = 1;
  
-    runInfo.efficiency = FORCE_PRIORITY_9;
+    dispatchData.efficiency = FORCE_PRIORITY_9;
  
-    return runInfo;
+    return dispatchData;
  }
  
  KernelsData ConcatenationKernel_depth_bfyx_no_pitch::GetKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp

index 7eb9e19..4285f52 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp
@@ -62,20 +62,20 @@ bool ConcatenationKernel_fs_b_yx_fsv32::Validate(const Params& p, const optional
  }
  
  ConcatenationKernelBase::DispatchData ConcatenationKernel_fs_b_yx_fsv32::SetDefault(const concatenation_params& params) const {
-    DispatchData runInfo = ConcatenationKernelBase::SetDefault(params);
+    DispatchData dispatchData = ConcatenationKernelBase::SetDefault(params);
      const auto& input = params.inputs[0];
  
-    runInfo.gws0 = input.X().v;
-    runInfo.gws1 = input.Y().v;
-    runInfo.gws2 = CeilDiv(input.Feature().v, fsv) * subGroupSize * input.Batch().v;
+    dispatchData.gws[0] = input.X().v;
+    dispatchData.gws[1] = input.Y().v;
+    dispatchData.gws[2] = CeilDiv(input.Feature().v, fsv) * subGroupSize * input.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = subGroupSize;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = subGroupSize;
  
-    runInfo.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConcatenationKernel_fs_b_yx_fsv32::GetJitConstants(const concatenation_params& params) const {
@@ -113,13 +113,13 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
          ifm_offset += ifm;
  
          auto& kernel = kd.kernels[i];
-        DispatchData runInfo = SetDefault(newParams);
+        DispatchData dispatchData = SetDefault(newParams);
          auto cldnnJit = GetJitConstants(newParams);
          auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, optParams);
          auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
  
-        kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-        kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+        kernel.workGroups.global = dispatchData.gws;
+        kernel.workGroups.local = dispatchData.lws;
          kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i});
          kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
@@ -131,7 +131,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
          kernel.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0});
  
          lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v;
-        efficiency = std::max(efficiency, runInfo.efficiency);
+        efficiency = std::max(efficiency, dispatchData.efficiency);
      }
  
      kd.estimatedTime = efficiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp

index 36abefe..9d1fcfa 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp
@@ -88,27 +88,17 @@ bool ConcatenationKernel_simple_Ref::Validate(const Params& p, const optional_pa
  }
  
  ConcatenationKernelBase::DispatchData ConcatenationKernel_simple_Ref::SetDefault(const concatenation_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
      const auto& input = params.inputs[0];
  
-    std::vector<size_t> global;
-    global = {
-        input.X().v * input.Y().v,
-        input.Z().v * input.W().v,
-        input.Feature().v * input.Batch().v};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { input.X().v * input.Y().v,
+                         input.Z().v * input.W().v,
+                         input.Feature().v * input.Batch().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];  // X * Y
-    kd.gws1 = global[1];  // Z * W
-    kd.gws2 = global[2];  // F * B
+    dispatchData.efficiency = FORCE_PRIORITY_9;
  
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    kd.efficiency = FORCE_PRIORITY_9;
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ConcatenationKernel_simple_Ref::GetKernelsData(const Params& params, const optional_params& optParams) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp

index 5ea9e20..8bf5083 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp
@@ -34,7 +34,7 @@ ConvolutionKernel_b_fs_yx_fsv16::ConvolutionKernel_b_fs_yx_fsv16() : Convolution
  }
  
  ConvolutionKernel_b_fs_yx_fsv16::AutoTuneOption ConvolutionKernel_b_fs_yx_fsv16::GetAutoTuneOptions(const Params& params,
-                                                                                          int /*autoTuneIndex*/) const {
+                                                                                                    int /*autoTuneIndex*/) const {
      const convolution_params& cp = static_cast<const convolution_params&>(params);
      auto x = cp.output.X().v;
      auto f = cp.output.Feature().v;
@@ -89,33 +89,33 @@ ParamsKey ConvolutionKernel_b_fs_yx_fsv16::GetSupportedKey() const {
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16::SetDefault(const convolution_params& params,
-                                                                           int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params);
+                                                                                int autoTuneIndex) const {
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
      auto autoTune = GetAutoTuneOptions(params, autoTuneIndex);
-    kd.cldnnStyle.blockWidth = autoTune.blockWidth;
+    dispatchData.cldnnStyle.blockWidth = autoTune.blockWidth;
  
      auto x = out.X().v;
      auto y = out.Y().v;
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = CeilDiv(x, autoTune.blockWidth) * y;
-    kd.gws1 = Align(f, sub_group_size);
-    kd.gws2 = b;
+    dispatchData.gws[0] = CeilDiv(x, autoTune.blockWidth) * y;
+    dispatchData.gws[1] = Align(f, sub_group_size);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = sub_group_size;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
  
      if (b == 1)
-        kd.efficiency = FORCE_PRIORITY_2;
+        dispatchData.efficiency = FORCE_PRIORITY_2;
      else
-        kd.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const {
@@ -155,12 +155,12 @@ bool ConvolutionKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_p
  }
  
  JitConstants ConvolutionKernel_b_fs_yx_fsv16::GetJitConstants(const convolution_params& params,
-                                                              const DispatchData& runInfo) const {
+                                                              const DispatchData& dispatchData) const {
      auto input = params.inputs[0];
      auto output = params.output;
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
          FusedOpsConfiguration conf_vec = { "_VEC",
@@ -213,8 +213,8 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16::GetJitConstants(const convolution_
  }
  
  KernelsData ConvolutionKernel_b_fs_yx_fsv16::GetTunedKernelsDataByIndex(const Params& params,
-                                                                   const optional_params& options,
-                                                                   const int autoTuneIndex) const {
+                                                                        const optional_params& options,
+                                                                        const int autoTuneIndex) const {
      auto tuneOptions = GetAutoTuneOptions(params, autoTuneIndex);
      return GetCommonKernelsData(params, options, tuneOptions.exeMode, autoTuneIndex);
  }
@@ -224,7 +224,7 @@ KernelsData ConvolutionKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params
  }
  
  KernelsData ConvolutionKernel_b_fs_yx_fsv16::GetKernelsDataForAutoTune(const Params& params,
-                                                                  const optional_params& options) const {
+                                                                       const optional_params& options) const {
      if (!Validate(params, options)) {
          return {};
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h

index b371a02..ca6a784 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h
@@ -49,7 +49,7 @@ protected:
      bool NeedPaddedInput() const override { return false; }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
  
  private:
      struct AutoTuneOption {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp

index c3b1084..7d9a70a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
@@ -34,7 +34,7 @@ ConvolutionKernel_b_fs_yx_fsv16_1x1::ConvolutionKernel_b_fs_yx_fsv16_1x1() : Con
  }
  
  ConvolutionKernel_b_fs_yx_fsv16_1x1::AutoTuneOption ConvolutionKernel_b_fs_yx_fsv16_1x1::GetAutoTuneOptions(const Params& params,
-                                                                                          int /*autoTuneIndex*/) const {
+                                                                                                            int /*autoTuneIndex*/) const {
      const convolution_params& cp = static_cast<const convolution_params&>(params);
      auto x = cp.output.X().v;
      auto f = cp.output.Feature().v;
@@ -73,10 +73,10 @@ ParamsKey ConvolutionKernel_b_fs_yx_fsv16_1x1::GetSupportedKey() const {
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16_1x1::SetDefault(const convolution_params& params,
                                                                                 int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params);
  
      auto autoTune = GetAutoTuneOptions(params, autoTuneIndex);
-    kd.cldnnStyle.blockWidth = autoTune.blockWidth;
+    dispatchData.cldnnStyle.blockWidth = autoTune.blockWidth;
  
      const auto& input = params.inputs[0];
      const auto& out = params.output;
@@ -85,29 +85,29 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16_1x1::SetDefa
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = CeilDiv(x * y, autoTune.blockWidth);
-    kd.gws1 = Align(f, feature_block_size);
-    kd.gws2 = b;
+    dispatchData.gws[0] = CeilDiv(x * y, autoTune.blockWidth);
+    dispatchData.gws[1] = Align(f, feature_block_size);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = sub_group_size;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
  
      auto bBlockSizeX = x % autoTune.blockWidth == 0;
      auto bBlockSizeXY = out.X().pad.Total() + out.Y().pad.Total() == 0;
      auto bInputPad = input.X().pad.Total() + input.Y().pad.Total() != 0;
-    
+
      if (b == 1) {
          if ((bBlockSizeX || bBlockSizeXY) && !bInputPad) {
-            kd.efficiency = FORCE_PRIORITY_1;
+            dispatchData.efficiency = FORCE_PRIORITY_1;
          } else {
-            kd.efficiency = FORCE_PRIORITY_3;
+            dispatchData.efficiency = FORCE_PRIORITY_3;
          }
      } else {
-        kd.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
      }
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_b_fs_yx_fsv16_1x1::Validate(const Params& p, const optional_params& o) const {
@@ -134,10 +134,10 @@ bool ConvolutionKernel_b_fs_yx_fsv16_1x1::Validate(const Params& p, const option
  }
  
  JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolution_params& params,
-                                                             const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                  const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
      if (!params.fused_ops.empty()) {
          auto input_dt = GetUnitType(params);
          FusedOpsConfiguration conf_vec = { "_VEC",
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h

index ff547e4..e514751 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h
@@ -43,7 +43,7 @@ protected:
      }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
  
      struct AutoTuneOption {
          size_t blockWidth;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp

index e2766a5..82a92fa 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp
@@ -70,28 +70,29 @@ bool ConvolutionKernel_b_fs_yx_fsv16_depthwise::Validate(const Params& p, const
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16_depthwise::SetDefault(const convolution_params& params,
-                                                                                     int) const {
-    DispatchData runInfo = Parent::SetDefault(params);
+                                                                                          int) const {
+    DispatchData dispatchData = Parent::SetDefault(params);
      const auto& out = params.output;
  
-    runInfo.gws0 = CeilDiv(out.X().v, x_block_size) * out.Y().v;
-    runInfo.gws1 = Align(out.Feature().v, feature_block_size);
-    runInfo.gws2 = out.Batch().v;
-    runInfo.lws0 = 1;
-    runInfo.lws1 = sub_group_size;
-    runInfo.lws2 = 1;
+    dispatchData.gws[0] = CeilDiv(out.X().v, x_block_size) * out.Y().v;
+    dispatchData.gws[1] = Align(out.Feature().v, feature_block_size);
+    dispatchData.gws[2] = out.Batch().v;
+
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
  
      if (out.Batch().v == 1)
-        runInfo.efficiency = FORCE_PRIORITY_1;
+        dispatchData.efficiency = FORCE_PRIORITY_1;
      else
-        runInfo.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_b_fs_yx_fsv16_depthwise::GetJitConstants(const convolution_params& params,
-                                                                   const DispatchData& kd) const {
-    auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                        const DispatchData& dispatchData) const {
+    auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
      const size_t block_width = 8;
  
@@ -129,7 +130,7 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_depthwise::GetJitConstants(const co
  }
  
  KernelsData ConvolutionKernel_b_fs_yx_fsv16_depthwise::GetKernelsData(const Params& params,
-                                                                 const optional_params& options) const {
+                                                                      const optional_params& options) const {
      return GetCommonKernelsData(params, options);
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h

index 69a4073..d2d1b3d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h
@@ -41,7 +41,7 @@ protected:
      }
  
      bool NeedPaddedInput() const override { return true; }
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp

index 64144f2..148d91b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp
@@ -76,11 +76,11 @@ ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetSupportedKey() const {
  }
  
  JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetJitConstants(const convolution_params& params,
-                                                                        const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
-    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_SPATIAL", kd.cldnnStyle.blockWidth));
-    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_FEATURES", kd.cldnnStyle.blockHeight));
-    mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", kd.cldnnStyle.prefetch));
+                                                                        const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
+    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_SPATIAL", dispatchData.cldnnStyle.blockWidth));
+    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_FEATURES", dispatchData.cldnnStyle.blockHeight));
+    mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", dispatchData.cldnnStyle.prefetch));
      mem_consts.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
      mem_consts.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
  
@@ -106,27 +106,27 @@ JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetJitConstants(const co
  
  ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::SetDefault(const convolution_params& params,
                                                                                            int index) const {
-    DispatchData kd;
+    DispatchData dispatchData;
      const auto& output = params.output;
      auto tune_params = GetAutoTuneParams(params, index);
      size_t k_slices = tune_params.feature_slm_split;
  
-    kd.gws0 = CeilDiv(output.X().v * output.Y().v, tune_params.out_block_spatial);
-    kd.gws1 = CeilDiv(output.Feature().v, tune_params.out_block_features * simd) * simd * k_slices;
-    kd.gws2 = output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(output.X().v * output.Y().v, tune_params.out_block_spatial);
+    dispatchData.gws[1] = CeilDiv(output.Feature().v, tune_params.out_block_features * simd) * simd * k_slices;
+    dispatchData.gws[2] = output.Batch().v;
  
-    kd.lws0 = 1;
-    kd.lws1 = simd * k_slices;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = simd * k_slices;
+    dispatchData.lws[2] = 1;
  
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+    dispatchData.cldnnStyle = {0, 0, 0, 0, 0};
+    dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0};
  
-    kd.cldnnStyle.blockWidth = tune_params.out_block_spatial;
-    kd.cldnnStyle.blockHeight = tune_params.out_block_features;
-    kd.cldnnStyle.prefetch = k_slices;
+    dispatchData.cldnnStyle.blockWidth = tune_params.out_block_spatial;
+    dispatchData.cldnnStyle.blockHeight = tune_params.out_block_features;
+    dispatchData.cldnnStyle.prefetch = k_slices;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
      auto in_f = params.weights.IFM().v;
      auto out_f = params.weights.OFM().v;
@@ -158,14 +158,14 @@ ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::S
      general_is_faster |= in_f == 256 && out_f == 128 && out_x == 3 && out_y == 3 && batch == 1;
  
      if (general_is_faster && !x_strided) {
-        kd.efficiency = FORCE_PRIORITY_3;
+        dispatchData.efficiency = FORCE_PRIORITY_3;
      }
  
      // Better to use kernel with 4 input features in a loop
      if (static_cast<float>(params.weights.IFM().v) / static_cast<float>(Align(params.weights.IFM().v, fsv)) < 0.5f)
-        kd.efficiency = FORCE_PRIORITY_4;
+        dispatchData.efficiency = FORCE_PRIORITY_4;
  
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  bool Convolution_kernel_b_fs_yx_fsv16_imad_1x1::Validate(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h

index 44f3f4a..90c5da2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h
@@ -35,7 +35,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
      WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp

index 8b43b59..47f1fbe 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp
@@ -47,20 +47,20 @@ ParamsKey ConvolutionKernel_b_fs_yx_fsv4_int8::GetSupportedKey() const {
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv4_int8::SetDefault(const convolution_params& cp, int) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
-    runInfo.efficiency = FORCE_PRIORITY_9;
+    dispatchData.efficiency = FORCE_PRIORITY_9;
      if (cp.output.X().v > 512 && cp.filterSize.x == 5 && cp.filterSize.y == 5)
-        runInfo.efficiency = FORCE_PRIORITY_2;
-    runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 2;
-    runInfo.gws1 = cp.output.Y().v;
-    runInfo.gws2 = sub_group_size;
+        dispatchData.efficiency = FORCE_PRIORITY_2;
+    dispatchData.gws[0] = CeilDiv(cp.output.X().v, sub_group_size) / 2;
+    dispatchData.gws[1] = cp.output.Y().v;
+    dispatchData.gws[2] = sub_group_size;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_b_fs_yx_fsv4_int8::Validate(const Params& p, const optional_params& o) const {
@@ -85,10 +85,10 @@ bool ConvolutionKernel_b_fs_yx_fsv4_int8::Validate(const Params& p, const option
      return true;
  }
  
-JitConstants ConvolutionKernel_b_fs_yx_fsv4_int8::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+JitConstants ConvolutionKernel_b_fs_yx_fsv4_int8::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2]));
  
      jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
      jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h

index 9cbc775..b4e8bbd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h
@@ -34,7 +34,7 @@ protected:
          return WeightsLayout::os_is_yx_osv16_isv4;
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp

index d3f3a47..4b8053e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp
@@ -242,7 +242,7 @@ bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::ValidateAutoTuneParams(const c
  
  ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::DispatchData
  ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::SetDefault(const convolution_params& params, int autoTuneIndex) const {
-    DispatchData kd;
+    DispatchData dispatchData;
      auto& out = params.output;
  
      auto tune_params = GetAutoTuneParams(params, autoTuneIndex);
@@ -254,29 +254,21 @@ ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::SetDefault(const convolution_params
          fsv = 32;
      }
  
-    std::vector<size_t> global = {
+    dispatchData.gws = {
          Align(CeilDiv(out.X().v, tune_params.tile_x), tune_params.lws0),
-        Align(out.Y().v,  tune_params.lws1),
+        Align(out.Y().v, tune_params.lws1),
          CeilDiv(out.Feature().v, fsv) * tune_params.simd * out.Batch().v
      };
-    std::vector<size_t> local = { tune_params.lws0, tune_params.lws1, tune_params.simd };
+    dispatchData.lws = { tune_params.lws0, tune_params.lws1, tune_params.simd };
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
+    dispatchData.gemmStyle = { 0, 0, 0, 0, 0, 0 };
  
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.cldnnStyle.blockWidth = tune_params.tile_x;
+    dispatchData.cldnnStyle.prefetch = tune_params.preload_input_slm;
  
-    kd.gemmStyle = { 0, 0, 0, 0, 0, 0 };
+    dispatchData.efficiency = params.stride.x == 1 ? FORCE_PRIORITY_1 : FORCE_PRIORITY_2;
  
-    kd.cldnnStyle.blockWidth = tune_params.tile_x;
-    kd.cldnnStyle.prefetch = tune_params.preload_input_slm;
-
-    kd.efficiency = params.stride.x == 1 ? FORCE_PRIORITY_1 : FORCE_PRIORITY_2;
-
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::HasPaddedInput(const convolution_params& params) const {
@@ -317,20 +309,20 @@ bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::ParamsHavePadding(const convol
      return needs_pad;
  }
  
-JitConstants ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+JitConstants ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
  
      constexpr size_t imad_width = 4;
      auto filter_spatial = params.weights.X().v * params.weights.Y().v;
      auto filter_blocked = filter_spatial / imad_width * imad_width;
  
-    mem_consts.AddConstant(MakeJitConstant("LWS0", kd.lws0));
-    mem_consts.AddConstant(MakeJitConstant("LWS1", kd.lws1));
-    mem_consts.AddConstant(MakeJitConstant("SIMD", kd.lws2));
+    mem_consts.AddConstant(MakeJitConstant("LWS0", dispatchData.lws[0]));
+    mem_consts.AddConstant(MakeJitConstant("LWS1", dispatchData.lws[1]));
+    mem_consts.AddConstant(MakeJitConstant("SIMD", dispatchData.lws[2]));
  
-    mem_consts.AddConstant(MakeJitConstant("TILE_X", kd.cldnnStyle.blockWidth));
+    mem_consts.AddConstant(MakeJitConstant("TILE_X", dispatchData.cldnnStyle.blockWidth));
      mem_consts.AddConstant(MakeJitConstant("FILTER_BLOCKED", filter_blocked));
-    mem_consts.AddConstant(MakeJitConstant("PRELOAD_INPUT_TO_SLM", kd.cldnnStyle.prefetch));
+    mem_consts.AddConstant(MakeJitConstant("PRELOAD_INPUT_TO_SLM", dispatchData.cldnnStyle.prefetch));
  
      auto needs_boundary_check = ParamsHavePadding(params) &&
          (!HasPaddedInput(params) ||
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp

index 31fe412..d191db2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp
@@ -43,7 +43,7 @@ protected:
      bool NeedPaddedInput() const override { return false; }
      bool HasPaddedInput(const convolution_params& params) const;
      bool ParamsHavePadding(const convolution_params& params) const;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
  
      struct AutoTuneParams {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp

index 4011302..19d0398 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
@@ -104,7 +104,7 @@ ParamsKey ConvolutionKernel_b_fs_zyx_fsv16::GetSupportedKey() const {
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_zyx_fsv16::SetDefault(const convolution_params& params,
                                                                             int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
  
      const auto& out = params.output;
      const auto& input = params.inputs[0];
@@ -130,36 +130,36 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_zyx_fsv16::SetDefault
              else
                  break;
          }
-        kd.cldnnStyle.blockWidth = ow_block;
+        dispatchData.cldnnStyle.blockWidth = ow_block;
          if (out.GetDType() == Datatype::F16) {
-            kd.lws0 = sub_group_size;
-            kd.lws1 = 1;
-            kd.lws2 = 1;
+            dispatchData.lws[0] = sub_group_size;
+            dispatchData.lws[1] = 1;
+            dispatchData.lws[2] = 1;
  
-            kd.gws0 = (f / 2);
-            kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
-            kd.gws2 = b % 2 == 0 ? b / 2 : b;  // unroll mb by 2
+            dispatchData.gws[0] = (f / 2);
+            dispatchData.gws[1] = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
+            dispatchData.gws[2] = b % 2 == 0 ? b / 2 : b;  // unroll mb by 2
          } else {
-            kd.lws0 = sub_group_size;
-            kd.lws1 = 1;
-            kd.lws2 = 1;
+            dispatchData.lws[0] = sub_group_size;
+            dispatchData.lws[1] = 1;
+            dispatchData.lws[2] = 1;
  
              auto ocb = (f % 32 == 0) ? 32 : 16;
-            kd.gws0 = 16;
-            kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
-            kd.gws2 = b * f / ocb;
+            dispatchData.gws[0] = 16;
+            dispatchData.gws[1] = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
+            dispatchData.gws[2] = b * f / ocb;
          }
      } else if (ver_16mb16c) {
          f = (g > 1) ? f/g : Align(f, 16);
-        kd.lws0 = sub_group_size;
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[0] = sub_group_size;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
  
-        kd.gws0 = f;
-        kd.gws1 = x * y * z;
-        kd.gws2 = (out.GetDType() == Datatype::F16) ? b / 32 : b / 16;
+        dispatchData.gws[0] = f;
+        dispatchData.gws[1] = x * y * z;
+        dispatchData.gws[2] = (out.GetDType() == Datatype::F16) ? b / 32 : b / 16;
  
-        kd.cldnnStyle.blockWidth = 1;
+        dispatchData.cldnnStyle.blockWidth = 1;
      } else {
          auto oh_block = 1;
          f = Align(f / g, 16);
@@ -180,22 +180,22 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_zyx_fsv16::SetDefault
                  ocb /= 2;
          }
  
-        kd.cldnnStyle.blockWidth = ow_block;
+        dispatchData.cldnnStyle.blockWidth = ow_block;
  
-        kd.gws0 = ocb;
-        kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
-        kd.gws2 = b * (f / ocb) * g;
+        dispatchData.gws[0] = ocb;
+        dispatchData.gws[1] = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
+        dispatchData.gws[2] = b * (f / ocb) * g;
  
-        kd.lws0 = sub_group_size;
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[0] = sub_group_size;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
      if (b == 1)
-        kd.efficiency = FORCE_PRIORITY_2;
+        dispatchData.efficiency = FORCE_PRIORITY_2;
      else
-        kd.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_params& o) const {
@@ -231,10 +231,10 @@ bool ConvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_
  }
  
  JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution_params& params,
-                                                         const DispatchData& runInfo) const {
+                                                               const DispatchData& dispatchData) const {
      auto input = params.inputs[0];
      auto output = params.output;
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      const bool is_1stconv = input.Feature().v == 3 && input.GetLayout() == DataLayout::bfzyx;
      const bool ver_16mb16c = !is_1stconv && ((output.GetDType() == Datatype::F16 && output.Batch().v % 32 == 0) ||
@@ -253,9 +253,9 @@ JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution
      else
          jit.AddConstant(MakeJitConstant("CASE_3D", 1));
  
-    jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0));
-    jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
-    jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
+    jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0]));
+    jit.AddConstant(MakeJitConstant("LWS_1", dispatchData.lws[1]));
+    jit.AddConstant(MakeJitConstant("LWS_2", dispatchData.lws[2]));
  
      if (is_1stconv) {
          if (output.GetDType() == Datatype::F16) {
@@ -267,11 +267,11 @@ JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution
      } else if (ver_16mb16c) {
          jit.AddConstant(MakeJitConstant("OCB", 1));
      } else {
-        jit.AddConstant(MakeJitConstant("OCB", runInfo.gws0));
+        jit.AddConstant(MakeJitConstant("OCB", dispatchData.gws[0]));
      }
      jit.AddConstant(MakeJitConstant("SUM_SCALE", 1));
  
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
  
      if (ver_16mb16c) {
          jit.AddConstant(MakeJitConstant("MB_BLOCK", 16));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h

index 19fa02c..cd94731 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h
@@ -55,7 +55,7 @@ protected:
      }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
  
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::ELTWISE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp

index cfd6abd..82b1252 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
@@ -204,7 +204,7 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params&
      size_t in_block_depth = 1;
  
      bool break_external_loop = false;
-    
+
      for (size_t d = 1; d < 16; ++d) {
          if (params.output.Z().v % d != 0)
              continue;
@@ -283,7 +283,7 @@ float Convolution_kernel_b_fs_zyx_fsv16_imad::EstimateOccupancy(const convolutio
  }
  
  float Convolution_kernel_b_fs_zyx_fsv16_imad::EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const {
-    size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_depth * 
+    size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_depth *
                            block.output_block_features * (block.feature_slm_split - 1);
      size_t slm_bytes = slm_elements * BytesPerElement(GetAccumulatorType(params));
  
@@ -331,8 +331,8 @@ KernelsData Convolution_kernel_b_fs_zyx_fsv16_imad::GetKernelsData(const Params&
  }
  
  JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convolution_params& params,
-                                                                     const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+                                                                     const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
  
      auto block_params = GetBlockParams(params);
  
@@ -369,7 +369,7 @@ JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convo
                  idx_order[idx_order.size() - 3] = "out_z";
              }
          }
-        
+
          if (block_params.output_block_height != 1) {
              loop_axes.push_back(Tensor::DataChannelName::Y);
          } else {
@@ -392,28 +392,28 @@ JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convo
  }  // GetJitConstants
  
  ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_zyx_fsv16_imad::SetDefault(const convolution_params& params,
-                                                                           int) const {
-    DispatchData kd;
+                                                                                       int) const {
+    DispatchData dispatchData;
      const auto& output = params.output;
      const auto& weights = params.weights;
      auto block_params = GetBlockParams(params);
  
-    kd.gws0 = CeilDiv(output.X().v, block_params.output_block_width);
-    kd.gws1 = CeilDiv(output.Y().v, block_params.output_block_height) * CeilDiv(output.Z().v, block_params.output_block_depth);
-    kd.gws2 = output.Batch().v * CeilDiv(weights.OFM().v, block_params.output_block_features) * params.groups * simd * block_params.feature_slm_split;
+    dispatchData.gws[0] = CeilDiv(output.X().v, block_params.output_block_width);
+    dispatchData.gws[1] = CeilDiv(output.Y().v, block_params.output_block_height) * CeilDiv(output.Z().v, block_params.output_block_depth);
+    dispatchData.gws[2] = output.Batch().v * CeilDiv(weights.OFM().v, block_params.output_block_features) * params.groups * simd * block_params.feature_slm_split;
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = simd * block_params.feature_slm_split;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = simd * block_params.feature_slm_split;
  
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+    dispatchData.cldnnStyle = {0, 0, 0, 0, 0};
+    dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0};
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
      if (static_cast<float>(params.weights.IFM().v) / static_cast<float>(Align(params.weights.IFM().v, fsv)) < 0.5f)
-        kd.efficiency = FORCE_PRIORITY_4;
+        dispatchData.efficiency = FORCE_PRIORITY_4;
  
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  bool Convolution_kernel_b_fs_zyx_fsv16_imad::Validate(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h

index bdde4a5..35427cb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
      WeightsLayout GetPreferredWeightsLayout(const convolution_params& p) const override {
@@ -50,7 +50,7 @@ protected:
          size_t output_block_width;
          size_t output_block_height;
          size_t output_block_depth;
-        
+
          size_t output_block_features;
  
          size_t input_block_width;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp

index 11088e2..5386cc3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -46,9 +46,9 @@ bool ConvolutionKernelBase::Validate(const Params& p, const optional_params& o)
      return true;
  }
  
-JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
+JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
      JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
-    mem_consts.Merge(GetFusedPrimitivesJitConstants(params, kd));
+    mem_consts.Merge(GetFusedPrimitivesJitConstants(params, dispatchData));
      const auto& padding = params.padding;
      const auto& input = params.inputs[0];
  
@@ -101,12 +101,12 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa
  
      std::vector<uint32_t> unrollLoopParams{params.filterSize.x,
                                             params.filterSize.y,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDX,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDY,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDZ,
-                                           (uint32_t)kd.gemmStyle.subBlockDimM,
-                                           (uint32_t)kd.gemmStyle.subBlockDimK,
-                                           (uint32_t)kd.gemmStyle.subBlockDimN};
+                                           (uint32_t)dispatchData.gemmStyle.globalWorkSizeDX,
+                                           (uint32_t)dispatchData.gemmStyle.globalWorkSizeDY,
+                                           (uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ,
+                                           (uint32_t)dispatchData.gemmStyle.subBlockDimM,
+                                           (uint32_t)dispatchData.gemmStyle.subBlockDimK,
+                                           (uint32_t)dispatchData.gemmStyle.subBlockDimN};
  
      auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end());
  
@@ -116,13 +116,15 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa
      return mem_consts;
  }
  
-bool ConvolutionKernelBase::CheckWorkGroups(const ConvolutionKernelBase::DispatchData& kd) {
-    if (kd.gws0 == 0 || kd.gws1 == 0 || kd.gws2 == 0 || kd.lws0 == 0 || kd.lws1 == 0 || kd.lws2 == 0) {
+bool ConvolutionKernelBase::CheckWorkGroups(const ConvolutionKernelBase::DispatchData& dispatchData) {
+    if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3)
          return false;
-    }
  
-    if ((kd.gws0 % kd.lws0) != 0 || (kd.gws1 % kd.lws1) != 0 || (kd.gws2 % kd.lws2) != 0) {
-        return false;
+    for (size_t i = 0; i < dispatchData.gws.size(); i++) {
+        if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0)
+            return false;
+        if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0)
+            return false;
      }
  
      return true;
@@ -164,43 +166,33 @@ bool ConvolutionKernelBase::CheckPitchForSplitOnly(const convolution_params& par
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernelBase::SetDefault(const convolution_params& params, int) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& out = params.output;
-    kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
-    std::vector<size_t> global;
      if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf) {
-        global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
+        dispatchData.gws = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
      } else if (params.output.GetLayout() == DataLayout::bfzyx) {
-        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+        dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
      } else {
-        global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
+        dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
      }
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    kd.cldnnStyle.blockWidth = 1;
-    kd.cldnnStyle.blockHeight = 1;
-    kd.cldnnStyle.prefetch = 0;
-    kd.cldnnStyle.inputBlockArraySize = 0;
-    kd.cldnnStyle.inputBlockWidth = 0;
-
-    kd.gemmStyle.globalWorkSizeDX = 1;
-    kd.gemmStyle.globalWorkSizeDY = 1;
-    kd.gemmStyle.globalWorkSizeDZ = 1;
-    kd.gemmStyle.subBlockDimK = 1;
-    kd.gemmStyle.subBlockDimM = 0;
-    kd.gemmStyle.subBlockDimN = 0;
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    return kd;
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+
+    dispatchData.cldnnStyle.blockWidth = 1;
+    dispatchData.cldnnStyle.blockHeight = 1;
+    dispatchData.cldnnStyle.prefetch = 0;
+    dispatchData.cldnnStyle.inputBlockArraySize = 0;
+    dispatchData.cldnnStyle.inputBlockWidth = 0;
+
+    dispatchData.gemmStyle.globalWorkSizeDX = 1;
+    dispatchData.gemmStyle.globalWorkSizeDY = 1;
+    dispatchData.gemmStyle.globalWorkSizeDZ = 1;
+    dispatchData.gemmStyle.subBlockDimK = 1;
+    dispatchData.gemmStyle.subBlockDimM = 0;
+    dispatchData.gemmStyle.subBlockDimN = 0;
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    return dispatchData;
  }
  
  KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
@@ -232,21 +224,21 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
          if (kd.reorderInput && !options.allowInputReordering)
              return {};
      }
-    DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
+    DispatchData dispatchData = SetDefault(newParams, autoTuneIndex);
  
-    if (!CheckWorkGroups(runInfo)) {
+    if (!CheckWorkGroups(dispatchData)) {
          // Internal Error - wrong calculation of global/local work group sizes
          return {};
      }
  
      auto finalKernelName = GetKernelName(newParams);
-    auto cldnnJit = GetJitConstants(newParams, runInfo);
+    auto cldnnJit = GetJitConstants(newParams, dispatchData);
      auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
      auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       finalKernelName,
                       jit,
@@ -276,7 +268,7 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
      }
      kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
      kd.autoTuneIndex = autoTuneIndex;
  
      return {kd};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h

index d64f681..24bbbba 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h
@@ -66,8 +66,8 @@ protected:
      virtual std::string GetKernelName(const convolution_params&) const { return kernelName; }
      virtual bool NeedPaddedInput() const { return false; }
      bool Validate(const Params& p, const optional_params& o) const override;
-    virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const;
-    virtual JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const;
+    virtual JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& dispatchData) const;
      virtual DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const;
      static bool CheckWorkGroups(const DispatchData&);
      static bool CheckPitchForSplitOnly(const convolution_params& params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp

index b016fe7..c7a0b9f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp
@@ -41,7 +41,7 @@ ParamsKey ConvolutionKernel_bfyx_1x1::GetSupportedKey() const {
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1::SetDefault(const convolution_params& params, int) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -50,17 +50,17 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1::SetDefault(const
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = Align(x * y, 16) / 16;
-    kd.gws1 = Align(f, 16);
-    kd.gws2 = b;
+    dispatchData.gws[0] = Align(x * y, 16) / 16;
+    dispatchData.gws[1] = Align(f, 16);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = 16;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 16;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_1x1::Validate(const Params& p, const optional_params& o) const {
@@ -86,8 +86,8 @@ bool ConvolutionKernel_bfyx_1x1::Validate(const Params& p, const optional_params
      return true;
  }
  
-JitConstants ConvolutionKernel_bfyx_1x1::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+JitConstants ConvolutionKernel_bfyx_1x1::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      if (params.output.Feature().v % 16)
          jit.AddConstant(MakeJitConstant("LEFTOVERS", 1));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h

index 62d5cb2..fb4d626 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h
@@ -36,6 +36,6 @@ protected:
      }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp

index ac2ac40..c15ffcb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp
@@ -32,7 +32,7 @@ ParamsKey ConvolutionKernel_bfyx_1x1_gemm_buf::GetSupportedKey() const {
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1_gemm_buf::SetDefault(const convolution_params& params, int) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -41,17 +41,17 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1_gemm_buf::SetDefa
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = Align(f, 16);
-    kd.gws1 = CeilDiv(x * y, 16);
-    kd.gws2 = b;
+    dispatchData.gws[0] = Align(f, 16);
+    dispatchData.gws[1] = CeilDiv(x * y, 16);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 16;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 16;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_1x1_gemm_buf::Validate(const Params& p, const optional_params& o) const {
@@ -75,8 +75,8 @@ bool ConvolutionKernel_bfyx_1x1_gemm_buf::Validate(const Params& p, const option
      return true;
  }
  
-JitConstants ConvolutionKernel_bfyx_1x1_gemm_buf::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+JitConstants ConvolutionKernel_bfyx_1x1_gemm_buf::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      const auto& out = params.output;
      const auto& input = params.inputs[0];
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h

index 1b7b7bc..de75aca 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h
@@ -36,6 +36,6 @@ protected:
      }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp

index 2537828..d8850b9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp
@@ -76,24 +76,24 @@ static block_params get_out_block_size(const convolution_params& p) {
  
  ConvolutionKernelBase::DispatchData convolution_kernel_bfyx_1x1_opt::SetDefault(const convolution_params& cp,
                                                                                  int) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
      constexpr size_t sub_group_size = 8;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      auto block = get_out_block_size(cp);
  
-    runInfo.gws0 = cp.output.X().v / block.out_width;
-    runInfo.gws1 = cp.output.Y().v / block.out_height;
-    runInfo.gws2 =
-        2 * (cp.output.Feature().v * cp.output.Batch().v) / block.out_depth;  // process 8 output channels per Workitem
+    dispatchData.gws[0] = cp.output.X().v / block.out_width;
+    dispatchData.gws[1] = cp.output.Y().v / block.out_height;
+    // process 8 output channels per Workitem
+    dispatchData.gws[2] = 2 * (cp.output.Feature().v * cp.output.Batch().v) / block.out_depth;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 2 * sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 2 * sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool convolution_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_params& o) const {
@@ -128,8 +128,8 @@ bool convolution_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_p
  }
  
  JitConstants convolution_kernel_bfyx_1x1_opt::GetJitConstants(const convolution_params& params,
-                                                              const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                              const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      auto block = get_out_block_size(params);
      jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h

index ce8f0a0..9ce8b9e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h
@@ -31,9 +31,9 @@ public:
  
  protected:
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp

index 48d0cf2..45c57a8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
@@ -71,9 +71,8 @@ bool ConvolutionKernel_bfyx_3x3_dw_opt::Validate(const Params& p, const optional
      return true;
  }
  
-ConvolutionKernel_bfyx_3x3_dw_opt::AutoTuneOption ConvolutionKernel_bfyx_3x3_dw_opt::GetAutoTuneOptions(
-    const Params&,
-    int autoTuneIndex) const {
+ConvolutionKernel_bfyx_3x3_dw_opt::AutoTuneOption ConvolutionKernel_bfyx_3x3_dw_opt::GetAutoTuneOptions(const Params&,
+                                                                                                        int autoTuneIndex) const {
      if ((autoTuneIndex >= 0) && (autoTuneIndex < static_cast<int>(autoTuneOptions.size()))) {
          return autoTuneOptions[autoTuneIndex];
      }
@@ -87,7 +86,7 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefaul
                                                                                    int autoTuneIndex) const {
      constexpr int simdSize = 16;
  
-    DispatchData runInfo = Parent::SetDefault(params);
+    DispatchData dispatchData = Parent::SetDefault(params);
  
      auto options = GetAutoTuneOptions(params, autoTuneIndex);
  
@@ -96,28 +95,28 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefaul
      const int numTilesY = static_cast<int>(
          std::ceil(static_cast<float>(params.inputs[0].Y().v) / static_cast<float>(options.tileDims.y)));
  
-    runInfo.cldnnStyle.blockWidth = options.tileDims.x;
-    runInfo.cldnnStyle.blockHeight = options.tileDims.y;
-    runInfo.gws0 = numTilesX * simdSize;
-    runInfo.gws1 = numTilesY;
-    runInfo.gws2 = params.inputs[0].Feature().v * params.inputs[0].Batch().v;
-    runInfo.lws0 = simdSize;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.cldnnStyle.blockWidth = options.tileDims.x;
+    dispatchData.cldnnStyle.blockHeight = options.tileDims.y;
+    dispatchData.gws[0] = numTilesX * simdSize;
+    dispatchData.gws[1] = numTilesY;
+    dispatchData.gws[2] = params.inputs[0].Feature().v * params.inputs[0].Batch().v;
+    dispatchData.lws[0] = simdSize;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    runInfo.efficiency = FORCE_PRIORITY_5;
+    dispatchData.efficiency = FORCE_PRIORITY_5;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_bfyx_3x3_dw_opt::GetJitConstants(const convolution_params& params,
-                                                                const DispatchData& kd) const {
-    stSize tileDims = {kd.cldnnStyle.blockWidth, kd.cldnnStyle.blockHeight};
-    auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                const DispatchData& dispatchData) const {
+    stSize tileDims = {dispatchData.cldnnStyle.blockWidth, dispatchData.cldnnStyle.blockHeight};
+    auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
      if (tileDims.y != 0 && tileDims.x != 0) {
-        mem_consts.AddConstant(MakeJitConstant("UNIT_BYTE_SIZE", kd.fp16UnitUsed ? sizeof(short) : sizeof(float)));
-        mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", kd.lws0));
+        mem_consts.AddConstant(MakeJitConstant("UNIT_BYTE_SIZE", BytesPerElement(params.output.GetDType())));
+        mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]));
          mem_consts.AddConstant(MakeJitConstant("TILE_HEIGHT", tileDims.y));
          mem_consts.AddConstant(MakeJitConstant("TILE_WIDTH", tileDims.x));
      }
@@ -132,9 +131,9 @@ KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const
  
      KernelData kd = KernelData::Default<convolution_params>(params);
      convolution_params& convParams = *static_cast<convolution_params*>(kd.params.get());
-    DispatchData runInfo = SetDefault(convParams, autoTuneIndex);
+    DispatchData dispatchData = SetDefault(convParams, autoTuneIndex);
  
-    if (static_cast<int>(static_cast<int>(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize >
+    if (static_cast<int>(static_cast<int>(dispatchData.gws[0] - 1) / simdSize) * dispatchData.cldnnStyle.blockWidth + simdSize >
          convParams.inputs[0].Y().pitch) {
          // Internal Error - requested tile size is not supported for y pitch
          return {};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h

index 16c2735..c8e5285 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h
@@ -38,7 +38,7 @@ protected:
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
          return WeightsLayout::oiyx;
      }
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
  
      struct AutoTuneOption {
@@ -49,4 +49,4 @@ protected:
      AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const;
      std::vector<AutoTuneOption> autoTuneOptions = {};
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp

index 8046070..4b3709f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
@@ -56,29 +56,22 @@ bool ConvolutionKernel_bfyx_depthwise_weights_lwg::Validate(const Params& p, con
      return true;
  }
  
-ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault(
-    const convolution_params& params,
-    int) const {
-    DispatchData runInfo = Parent::SetDefault(params);
+ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault(const convolution_params& params,
+                                                                                             int) const {
+    DispatchData dispatchData = Parent::SetDefault(params);
      const auto& out = params.output;
  
-    std::vector<size_t> global = {out.X().v * out.Y().v, out.Feature().v, out.Batch().v};
+    dispatchData.gws = { Align(out.X().v * out.Y().v, 16), out.Feature().v, out.Batch().v };
+    dispatchData.lws = { 16, 1, 1 };
  
-    runInfo.gws0 = Align(global[0], 16);
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-    runInfo.lws0 = 16;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    runInfo.efficiency = FORCE_PRIORITY_2;
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_bfyx_depthwise_weights_lwg::GetJitConstants(const convolution_params& params,
-                                                                           const DispatchData& kd) const {
-    auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                           const DispatchData& dispatchData) const {
+    auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
      if (params.padding.x != 0 || params.padding.y != 0)
          mem_consts.AddConstant(MakeJitConstant("BOUNDARY_CHECK", 1));
@@ -90,4 +83,4 @@ KernelsData ConvolutionKernel_bfyx_depthwise_weights_lwg::GetKernelsData(const P
                                                                           const optional_params& options) const {
      return GetTunedKernelsDataByIndex(params, options);
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h

index 0aa4b4c..796d45f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
@@ -34,7 +34,7 @@ protected:
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
          return WeightsLayout::goiyx;
      }
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp

index df57f4f..2c5849f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp
@@ -38,16 +38,16 @@ ParamsKey ConvolutionKernel_bfyx_Direct_10_10_12::GetSupportedKey() const {
  }
  
  JitConstants ConvolutionKernel_bfyx_Direct_10_10_12::GetJitConstants(const convolution_params& cp,
-                                                                     const DispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(cp, runInfo);
+                                                                     const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(cp, dispatchData);
  
      jit.AddConstants({
-        MakeJitConstant("ALIGNED_OFM", RoundUp(cp.output.Feature().v / cp.groups, runInfo.gemmStyle.subBlockDimN) * cp.groups),
-        MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(cp.output.Feature().v / cp.groups, runInfo.gemmStyle.subBlockDimN)),
-        MakeJitConstant("DX", runInfo.gemmStyle.globalWorkSizeDX),
-        MakeJitConstant("DY", runInfo.gemmStyle.globalWorkSizeDY),
+        MakeJitConstant("ALIGNED_OFM", RoundUp(cp.output.Feature().v / cp.groups, dispatchData.gemmStyle.subBlockDimN) * cp.groups),
+        MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(cp.output.Feature().v / cp.groups, dispatchData.gemmStyle.subBlockDimN)),
+        MakeJitConstant("DX", dispatchData.gemmStyle.globalWorkSizeDX),
+        MakeJitConstant("DY", dispatchData.gemmStyle.globalWorkSizeDY),
          MakeJitConstant("KERNEL_SLICE_DIV2", (cp.filterSize.x * cp.filterSize.y) / 2),
-        MakeJitConstant("RIGHT_PARTIAL_TILE_K", cp.output.X().v % runInfo.gemmStyle.globalWorkSizeDX),
+        MakeJitConstant("RIGHT_PARTIAL_TILE_K", cp.output.X().v % dispatchData.gemmStyle.globalWorkSizeDX),
          MakeJitConstant("INPUT_BUFFER_WIDTH_PADDED", ""),  // TODO: enable non padding path again
          MakeJitConstant("INPUT_BUFFER_HEIGHT_PADDED", ""),
      });
@@ -55,30 +55,29 @@ JitConstants ConvolutionKernel_bfyx_Direct_10_10_12::GetJitConstants(const convo
      return jit;
  }
  
-ConvolutionKernel_bfyx_Direct_10_10_12::Parent::DispatchData ConvolutionKernel_bfyx_Direct_10_10_12::SetDefault(
-    const convolution_params& arg,
-    int) const {
-    Parent::DispatchData runInfo = Parent::SetDefault(arg);
+ConvolutionKernel_bfyx_Direct_10_10_12::DispatchData ConvolutionKernel_bfyx_Direct_10_10_12::SetDefault(const convolution_params& arg,
+                                                                                                        int) const {
+    DispatchData dispatchData = Parent::SetDefault(arg);
  
      constexpr uint32_t TILE_N = 16;
  
      if (arg.filterSize.x == 5) {
-        runInfo.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 4, 1};
+        dispatchData.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 4, 1};
      } else {
-        runInfo.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 3, 1};
+        dispatchData.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 3, 1};
      }
  
-    runInfo.gws0 = RoundUp(arg.output.X().v, runInfo.gemmStyle.globalWorkSizeDX) / runInfo.gemmStyle.globalWorkSizeDX;
-    runInfo.gws1 = RoundUp(arg.output.Y().v, runInfo.gemmStyle.globalWorkSizeDY) / runInfo.gemmStyle.globalWorkSizeDY;
-    runInfo.gws2 = RoundUp(arg.output.Feature().v / arg.groups, TILE_N) * arg.output.Batch().v * arg.groups;
+    dispatchData.gws[0] = RoundUp(arg.output.X().v, dispatchData.gemmStyle.globalWorkSizeDX) / dispatchData.gemmStyle.globalWorkSizeDX;
+    dispatchData.gws[1] = RoundUp(arg.output.Y().v, dispatchData.gemmStyle.globalWorkSizeDY) / dispatchData.gemmStyle.globalWorkSizeDY;
+    dispatchData.gws[2] = RoundUp(arg.output.Feature().v / arg.groups, TILE_N) * arg.output.Batch().v * arg.groups;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = TILE_N;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = TILE_N;
  
-    runInfo.efficiency = FORCE_PRIORITY_4;
+    dispatchData.efficiency = FORCE_PRIORITY_4;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_Direct_10_10_12::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h

index 1f5a0f4..f40dd23 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h
@@ -34,9 +34,9 @@ protected:
          return (p.groups > 1) ? WeightsLayout::gi_yxs_os_yxsv2_osv16 : WeightsLayout::i_yxs_os_yxsv2_osv16;
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp

index f4fd240..97d7a37 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp
@@ -49,21 +49,21 @@ std::string ConvolutionKernel_bfyx_GEMMLike::GetKernelName(const convolution_par
  }
  
  JitConstants ConvolutionKernel_bfyx_GEMMLike::GetJitConstants(const convolution_params& params,
-                                                              const DispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+                                                              const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      jit.AddConstants({
-        MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(params.output.Feature().v / params.groups, runInfo.gemmStyle.subBlockDimN)),
-        MakeJitConstant("DX", runInfo.gemmStyle.globalWorkSizeDX),
-        MakeJitConstant("DY", runInfo.gemmStyle.globalWorkSizeDY),
+        MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(params.output.Feature().v / params.groups, dispatchData.gemmStyle.subBlockDimN)),
+        MakeJitConstant("DX", dispatchData.gemmStyle.globalWorkSizeDX),
+        MakeJitConstant("DY", dispatchData.gemmStyle.globalWorkSizeDY),
          MakeJitConstant("FILTER_SIZE_X_DIV2", params.filterSize.x / 2),
          MakeJitConstant("INPUT_BUFFER_WIDTH_PADDED", ""),  // TODO: enable non padding path again
          MakeJitConstant("INPUT_BUFFER_HEIGHT_PADDED", ""),
      });
  
-    if (CeilDiv(RoundUp(params.output.X().v * params.output.Y().v, runInfo.gemmStyle.subBlockDimM),
-                runInfo.gemmStyle.globalWorkSizeDY) %
-            runInfo.lws1 !=
+    if (CeilDiv(RoundUp(params.output.X().v * params.output.Y().v, dispatchData.gemmStyle.subBlockDimM),
+                dispatchData.gemmStyle.globalWorkSizeDY) %
+            dispatchData.lws[1] !=
          0)
          jit.AddConstant(MakeJitConstant("LEFTOVERS", 1));
  
@@ -73,29 +73,29 @@ JitConstants ConvolutionKernel_bfyx_GEMMLike::GetJitConstants(const convolution_
  ConvolutionKernel_bfyx_GEMMLike::Parent::DispatchData ConvolutionKernel_bfyx_GEMMLike::SetDefault(
      const convolution_params& arg,
      int autoTuneIndex) const {
-    DispatchData runInfo = Parent::SetDefault(arg, autoTuneIndex);
+    DispatchData dispatchData = Parent::SetDefault(arg, autoTuneIndex);
  
-    runInfo.lws0 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[2] = 1;
  
      if (arg.inputs[0].GetDType() == Datatype::F16) {
-        runInfo.gemmStyle = {1, arg.filterSize.x, 32, 32, 1, 1};
-        runInfo.lws1 = 16;
-        runInfo.efficiency = FORCE_PRIORITY_6;
+        dispatchData.gemmStyle = {1, arg.filterSize.x, 32, 32, 1, 1};
+        dispatchData.lws[1] = 16;
+        dispatchData.efficiency = FORCE_PRIORITY_6;
      } else {
-        runInfo.gemmStyle = {2, arg.filterSize.x, 32, 32, 2, 1};
-        runInfo.lws1 = 8;
-        runInfo.efficiency = FORCE_PRIORITY_8;
+        dispatchData.gemmStyle = {2, arg.filterSize.x, 32, 32, 2, 1};
+        dispatchData.lws[1] = 8;
+        dispatchData.efficiency = FORCE_PRIORITY_8;
      }
  
-    size_t sgemm_m = RoundUp(arg.output.X().v * arg.output.Y().v, runInfo.gemmStyle.subBlockDimM);
-    size_t sgemm_n = RoundUp(arg.output.Feature().v / arg.groups, runInfo.gemmStyle.subBlockDimN);
+    size_t sgemm_m = RoundUp(arg.output.X().v * arg.output.Y().v, dispatchData.gemmStyle.subBlockDimM);
+    size_t sgemm_n = RoundUp(arg.output.Feature().v / arg.groups, dispatchData.gemmStyle.subBlockDimN);
  
-    runInfo.gws0 = RoundUp(CeilDiv(sgemm_n, runInfo.gemmStyle.globalWorkSizeDX), runInfo.lws0);
-    runInfo.gws1 = RoundUp(CeilDiv(sgemm_m, runInfo.gemmStyle.globalWorkSizeDY), runInfo.lws1);
-    runInfo.gws2 = arg.output.Batch().v * arg.groups;
+    dispatchData.gws[0] = RoundUp(CeilDiv(sgemm_n, dispatchData.gemmStyle.globalWorkSizeDX), dispatchData.lws[0]);
+    dispatchData.gws[1] = RoundUp(CeilDiv(sgemm_m, dispatchData.gemmStyle.globalWorkSizeDY), dispatchData.lws[1]);
+    dispatchData.gws[2] = arg.output.Batch().v * arg.groups;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_GEMMLike::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h

index fdf6527..e11336c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h
@@ -34,8 +34,8 @@ protected:
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override;
      std::string GetKernelName(const convolution_params& params) const override;
      bool NeedPaddedInput() const override { return true; }
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp

index 94cb32e..415fe3f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp
@@ -39,19 +39,19 @@ ParamsKey ConvolutionKernel_bfyx_iyxo::GetSupportedKey() const {
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_iyxo::SetDefault(const convolution_params& cp, int) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
-    runInfo.efficiency = FORCE_PRIORITY_9;
+    dispatchData.efficiency = FORCE_PRIORITY_9;
  
-    runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 4;
-    runInfo.gws1 = cp.output.Y().v;
-    runInfo.gws2 = sub_group_size;
+    dispatchData.gws[0] = CeilDiv(cp.output.X().v, sub_group_size) / 4;
+    dispatchData.gws[1] = cp.output.Y().v;
+    dispatchData.gws[2] = sub_group_size;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_iyxo::Validate(const Params& p, const optional_params& o) const {
@@ -76,10 +76,10 @@ bool ConvolutionKernel_bfyx_iyxo::Validate(const Params& p, const optional_param
      return true;
  }
  
-JitConstants ConvolutionKernel_bfyx_iyxo::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+JitConstants ConvolutionKernel_bfyx_iyxo::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2]));
  
      return jit;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h

index ec82082..2c574ed 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h
@@ -34,7 +34,7 @@ protected:
          return WeightsLayout::iyxo;
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp

index eb641bc..fce3e6a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
@@ -147,7 +147,6 @@ ConvolutionKernel_bfyx_os_iyx_osv16::AutoTuneOption ConvolutionKernel_bfyx_os_iy
          option.blockWidth = 4;
          option.blockHeight = 3;
          option.prefetch = 5;
-        // run_info.efficiency = FORCE_PRIORITY_7; // GEMM is better
      }
  
      // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block
@@ -161,39 +160,39 @@ ConvolutionKernel_bfyx_os_iyx_osv16::AutoTuneOption ConvolutionKernel_bfyx_os_iy
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_os_iyx_osv16::SetDefault(const convolution_params& cp,
                                                                                      int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
      const auto of_maps = cp.output.Feature().v;
      const auto of_maps_per_group = of_maps / cp.groups;
      const size_t of_threads_per_batch = RoundUp(of_maps_per_group, sub_group_size) * cp.groups;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
-    runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
-    runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
-    runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+    dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+    dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch;
  
-    auto input_block_dims = get_bfyx_req_input_block_dims(runInfo.cldnnStyle.blockWidth,
-                                                          runInfo.cldnnStyle.blockHeight,
+    auto input_block_dims = get_bfyx_req_input_block_dims(dispatchData.cldnnStyle.blockWidth,
+                                                          dispatchData.cldnnStyle.blockHeight,
                                                            cp.filterSize,
                                                            cp.stride,
                                                            cp.dilation,
                                                            sub_group_size,
-                                                          runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2,
+                                                          cp.output.GetDType() == Datatype::F16 ? sub_group_size : sub_group_size / 2,
                                                            sub_group_size);
-    runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first;
-    runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second;
+    dispatchData.cldnnStyle.inputBlockArraySize = input_block_dims.first;
+    dispatchData.cldnnStyle.inputBlockWidth = input_block_dims.second;
  
-    runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
-    runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight);
-    runInfo.gws2 = of_threads_per_batch * cp.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth);
+    dispatchData.gws[1] = CeilDiv(cp.output.Y().v, dispatchData.cldnnStyle.blockHeight);
+    dispatchData.gws[2] = of_threads_per_batch * cp.output.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_os_iyx_osv16::Validate(const Params& p, const optional_params& o) const {
@@ -205,13 +204,13 @@ bool ConvolutionKernel_bfyx_os_iyx_osv16::Validate(const Params& p, const option
  }
  
  JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolution_params& params,
-                                                                  const DispatchData& runInfo) const {
+                                                                  const DispatchData& dispatchData) const {
      const auto of_maps = params.output.Feature().v;
      const auto of_maps_per_group = of_maps / params.groups;
      const size_t of_threads_per_batch = RoundUp(of_maps_per_group, sub_group_size);
      size_t leftovers = of_threads_per_batch - of_maps_per_group;
  
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetUnitType(params);
@@ -220,12 +219,12 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolut
      }
  
  
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight));
-    jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize));
-    jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth));
-    jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2]));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight));
+    jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", dispatchData.cldnnStyle.inputBlockArraySize));
+    jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth));
+    jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch));
  
      if (leftovers) {
          jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h

index 536970b..72706b0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h
@@ -40,7 +40,7 @@ protected:
                   FusedOpType::ACTIVATION };
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp

index 152fb25..f515fa3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp
@@ -146,7 +146,6 @@ ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::AutoTuneOption ConvolutionKernel_bfyx_
          option.blockWidth = 4;
          option.blockHeight = 3;
          option.prefetch = 5;
-        // run_info.efficiency = FORCE_PRIORITY_7; // GEMM is better
      }
  
      // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block
@@ -160,38 +159,38 @@ ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::AutoTuneOption ConvolutionKernel_bfyx_
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::SetDefault(const convolution_params& cp,
                                                                                           int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
      const auto of_maps = cp.output.Feature().v;
      const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
-    runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
-    runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
-    runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+    dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+    dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch;
  
-    auto input_block_dims = get_bfyx_req_input_block_dims(runInfo.cldnnStyle.blockWidth,
-                                                          runInfo.cldnnStyle.blockHeight,
+    auto input_block_dims = get_bfyx_req_input_block_dims(dispatchData.cldnnStyle.blockWidth,
+                                                          dispatchData.cldnnStyle.blockHeight,
                                                            cp.filterSize,
                                                            cp.stride,
                                                            cp.dilation,
                                                            sub_group_size,
-                                                          runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2,
+                                                          cp.output.GetDType() == Datatype::F16 ? sub_group_size : sub_group_size / 2,
                                                            sub_group_size);
-    runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first;
-    runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second;
+    dispatchData.cldnnStyle.inputBlockArraySize = input_block_dims.first;
+    dispatchData.cldnnStyle.inputBlockWidth = input_block_dims.second;
  
-    runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
-    runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight);
-    runInfo.gws2 = 2 * of_threads_per_batch * cp.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth);
+    dispatchData.gws[1] = CeilDiv(cp.output.Y().v, dispatchData.cldnnStyle.blockHeight);
+    dispatchData.gws[2] = 2 * of_threads_per_batch * cp.output.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 2 * sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 2 * sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::Validate(const Params& p, const optional_params& o) const {
@@ -211,19 +210,19 @@ bool ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::Validate(const Params& p, const o
  }
  
  JitConstants ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetJitConstants(const convolution_params& params,
-                                                                       const DispatchData& runInfo) const {
+                                                                       const DispatchData& dispatchData) const {
      const auto of_maps = params.output.Feature().v;
      const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
      size_t leftovers = of_threads_per_batch - of_maps;
  
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 16));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight));
-    jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize));
-    jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth));
-    jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight));
+    jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", dispatchData.cldnnStyle.inputBlockArraySize));
+    jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth));
+    jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch));
  
      if (leftovers) {
          jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h

index b3bcf6d..75e8c3b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h
@@ -33,7 +33,7 @@ public:
  
  protected:
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
@@ -50,4 +50,4 @@ private:
  
      std::vector<AutoTuneOption> autoTuneOptions = {};
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp

index 5df33de..a553b67 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp
@@ -72,32 +72,32 @@ ParamsKey ConvolutionKernel_bfyx_to_bfyx_f16::GetSupportedKey() const {
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_to_bfyx_f16::SetDefault(const convolution_params& params,
                                                                                     int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
      auto autoTune = GetAutoTuneOptions(params, autoTuneIndex);
-    kd.cldnnStyle.blockWidth = autoTune.blockWidth;
+    dispatchData.cldnnStyle.blockWidth = autoTune.blockWidth;
  
      auto x = out.X().v;
      auto y = out.Y().v;
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = CeilDiv(x, autoTune.blockWidth) * y;
-    kd.gws1 = Align(f, sub_group_size);
-    kd.gws2 = b;
+    dispatchData.gws[0] = CeilDiv(x, autoTune.blockWidth) * y;
+    dispatchData.gws[1] = Align(f, sub_group_size);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = sub_group_size;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
  
      if (b == 1)
-        kd.efficiency = FORCE_PRIORITY_2;
+        dispatchData.efficiency = FORCE_PRIORITY_2;
      else
-        kd.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_to_bfyx_f16::Validate(const Params& p, const optional_params& o) const {
@@ -124,12 +124,12 @@ bool ConvolutionKernel_bfyx_to_bfyx_f16::Validate(const Params& p, const optiona
  }
  
  JitConstants ConvolutionKernel_bfyx_to_bfyx_f16::GetJitConstants(const convolution_params& params,
-                                                                 const DispatchData& runInfo) const {
+                                                                 const DispatchData& dispatchData) const {
      auto input = params.inputs[0];
      auto output = params.output;
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetUnitType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h

index adf4d75..a1edefc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h
@@ -48,7 +48,7 @@ protected:
      bool NeedPaddedInput() const override { return false; }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      struct AutoTuneOption {
          size_t blockWidth;
          std::string exeMode;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp

index 2ec407a..e2f73f0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp
@@ -51,12 +51,12 @@ ParamsKey ConvolutionKernel_bfyx_to_bfyx_bsv16_fsv16::GetSupportedKey() const {
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_to_bfyx_bsv16_fsv16::SetDefault(const convolution_params& params,
-                                                                                   int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernel_bfyx_to_bfyx_f16::SetDefault(params, autoTuneIndex);
+                                                                                           int autoTuneIndex) const {
+    DispatchData dispatchData = ConvolutionKernel_bfyx_to_bfyx_f16::SetDefault(params, autoTuneIndex);
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_to_bfyx_bsv16_fsv16::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp

index 69d9c5c..b4db077 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp
@@ -73,24 +73,24 @@ ConvolutionKernel_bfyx_to_fs_byx_fsv32::AutoTuneOption ConvolutionKernel_bfyx_to
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_to_fs_byx_fsv32::SetDefault(const convolution_params& arg,
                                                                                         int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg);
  
      AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex);
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    runInfo.cldnnStyle.blockHeight = option.blockHeight;
-    runInfo.cldnnStyle.blockWidth = option.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = option.blockHeight;
+    dispatchData.cldnnStyle.blockWidth = option.blockWidth;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 16;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 16;
  
-    runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth);
-    runInfo.gws1 = CeilDiv(arg.output.Y().v, option.blockHeight);
-    runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth);
+    dispatchData.gws[1] = CeilDiv(arg.output.Y().v, option.blockHeight);
+    dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_bfyx_to_fs_byx_fsv32::Validate(const Params& p, const optional_params& o) const {
@@ -107,16 +107,16 @@ bool ConvolutionKernel_bfyx_to_fs_byx_fsv32::Validate(const Params& p, const opt
  }
  
  JitConstants ConvolutionKernel_bfyx_to_fs_byx_fsv32::GetJitConstants(const convolution_params& params,
-                                                                     const DispatchData& kd) const {
-    auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                     const DispatchData& dispatchData) const {
+    auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", kd.cldnnStyle.blockHeight));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight));
  
      auto inputBlockWidth =
-        getInputSize(params.stride.x, params.filterSize.x, params.dilation.x, kd.cldnnStyle.blockWidth);
+        getInputSize(params.stride.x, params.filterSize.x, params.dilation.x, dispatchData.cldnnStyle.blockWidth);
      auto inputBlockHeight =
-        getInputSize(params.stride.y, params.filterSize.y, params.dilation.y, kd.cldnnStyle.blockHeight);
+        getInputSize(params.stride.y, params.filterSize.y, params.dilation.y, dispatchData.cldnnStyle.blockHeight);
  
      auto inputBlockWidthRound = RoundUp(inputBlockWidth, subGroupSize);
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h

index 4310347..4298f88 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h
@@ -44,7 +44,7 @@ protected:
      }
  
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp

index 9ae158c..8793243 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp
@@ -111,25 +111,25 @@ ConvolutionKernel_fs_byx_fsv32::AutoTuneOption ConvolutionKernel_fs_byx_fsv32::G
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32::SetDefault(const convolution_params& arg,
                                                                                 int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg);
  
      AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex);
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    runInfo.cldnnStyle.blockHeight = 1;
-    runInfo.cldnnStyle.blockWidth = option.blockWidth;
-    runInfo.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth);
+    dispatchData.cldnnStyle.blockHeight = 1;
+    dispatchData.cldnnStyle.blockWidth = option.blockWidth;
+    dispatchData.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth);
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 16;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 16;
  
-    runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth);
-    runInfo.gws1 = arg.output.Y().v;
-    runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth);
+    dispatchData.gws[1] = arg.output.Y().v;
+    dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_fs_byx_fsv32::Validate(const Params& p, const optional_params& o) const {
@@ -150,15 +150,15 @@ bool ConvolutionKernel_fs_byx_fsv32::Validate(const Params& p, const optional_pa
  }
  
  JitConstants ConvolutionKernel_fs_byx_fsv32::GetJitConstants(const convolution_params& params,
-                                                             const DispatchData& kd) const {
-    auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                             const DispatchData& dispatchData) const {
+    auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
      auto accumulator_type = GetAccumulatorType(params);
      auto activation_type = GetAccumulatorType(params);
  
      jit.Merge(MakeTypeJitConstants(accumulator_type, "ACCUMULATOR"));
      jit.Merge(MakeTypeJitConstants(activation_type, "ACTIVATION"));
-    jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", kd.cldnnStyle.inputBlockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
      jit.AddConstant(MakeJitConstant("FSV", fsv));
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize));
      jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h

index fd3f668..a873772 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h
@@ -44,7 +44,7 @@ protected:
      }
  
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp

index 5533baa..49e3c70 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp
@@ -109,24 +109,24 @@ ConvolutionKernel_fs_byx_fsv32_1x1::AutoTuneOption ConvolutionKernel_fs_byx_fsv3
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32_1x1::SetDefault(const convolution_params& arg,
                                                                                     int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg);
  
      AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex);
  
-    runInfo.efficiency = FORCE_PRIORITY_4;
+    dispatchData.efficiency = FORCE_PRIORITY_4;
  
-    runInfo.cldnnStyle.blockHeight = option.blockHeight;
-    runInfo.cldnnStyle.blockWidth = option.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = option.blockHeight;
+    dispatchData.cldnnStyle.blockWidth = option.blockWidth;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 16;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 16;
  
-    runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth);
-    runInfo.gws1 = CeilDiv(arg.output.Y().v, option.blockHeight);
-    runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth);
+    dispatchData.gws[1] = CeilDiv(arg.output.Y().v, option.blockHeight);
+    dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_fs_byx_fsv32_1x1::Validate(const Params& p, const optional_params& o) const {
@@ -150,11 +150,11 @@ bool ConvolutionKernel_fs_byx_fsv32_1x1::Validate(const Params& p, const optiona
  }
  
  JitConstants ConvolutionKernel_fs_byx_fsv32_1x1::GetJitConstants(const convolution_params& params,
-                                                                 const DispatchData& kd) const {
-    auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                 const DispatchData& dispatchData) const {
+    auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", kd.cldnnStyle.blockHeight));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight));
      jit.AddConstant(MakeJitConstant("FSV", fsv));
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize));
      jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h

index 5e822e2..cda0f51 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h
@@ -45,7 +45,7 @@ protected:
  
      bool Validate(const Params& p, const optional_params& o) const override;
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp

index cbb3999..37fc5c4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp
@@ -109,26 +109,26 @@ ConvolutionKernel_fs_byx_fsv32_depthwise::AutoTuneOption ConvolutionKernel_fs_by
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32_depthwise::SetDefault(const convolution_params& arg,
-                                                                               int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+                                                                                         int autoTuneIndex) const {
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg);
  
      AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex);
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    runInfo.cldnnStyle.blockHeight = 1;
-    runInfo.cldnnStyle.blockWidth = option.blockWidth;
-    runInfo.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth);
+    dispatchData.cldnnStyle.blockHeight = 1;
+    dispatchData.cldnnStyle.blockWidth = option.blockWidth;
+    dispatchData.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth);
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 16;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 16;
  
-    runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth);
-    runInfo.gws1 = arg.output.Y().v;
-    runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth);
+    dispatchData.gws[1] = arg.output.Y().v;
+    dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_fs_byx_fsv32_depthwise::Validate(const Params& p, const optional_params& o) const {
@@ -154,11 +154,11 @@ bool ConvolutionKernel_fs_byx_fsv32_depthwise::Validate(const Params& p, const o
  }
  
  JitConstants ConvolutionKernel_fs_byx_fsv32_depthwise::GetJitConstants(const convolution_params& params,
-                                                             const DispatchData& kd) const {
-    auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                       const DispatchData& dispatchData) const {
+    auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", kd.cldnnStyle.inputBlockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
      jit.AddConstant(MakeJitConstant("FSV", fsv));
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize));
      jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread));
@@ -178,8 +178,8 @@ JitConstants ConvolutionKernel_fs_byx_fsv32_depthwise::GetJitConstants(const con
  }
  
  KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetTunedKernelsDataByIndex(const Params& params,
-                                                                       const optional_params& options,
-                                                                       const int autoTuneIndex) const {
+                                                                                 const optional_params& options,
+                                                                                 const int autoTuneIndex) const {
      auto tuneOptions = GetAutoTuneOptions(params, autoTuneIndex);
      return GetCommonKernelsData(params, options, tuneOptions.exeMode, autoTuneIndex);
  }
@@ -189,7 +189,7 @@ KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetKernelsData(const Param
  }
  
  KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetKernelsDataForAutoTune(const Params& params,
-                                                                      const optional_params& options) const {
+                                                                                const optional_params& options) const {
      if (!Validate(params, options)) {
          return {};
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h

index 2d563f2..08914ec 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h
@@ -44,7 +44,7 @@ protected:
      }
  
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp

index bb4158f..8283fc5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp
@@ -103,8 +103,8 @@ KernelsData ConvolutionKernel_imad::GetKernelsData(const Params& params, const o
      return GetCommonKernelsData(params, options);
  }
  
-JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
  
      const auto& input = params.inputs[0];
      const auto& output = params.output;
@@ -150,7 +150,7 @@ JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& p
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_imad::SetDefault(const convolution_params& params,
                                                                         int) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& output = params.output;
      const auto& weights = params.weights;
@@ -158,34 +158,26 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad::SetDefault(const con
      size_t otw, oth;
      getOutBlock_WH(output.X().v, params.stride.x, weights.X().v, params.dilation.x, otw, oth);
  
-    std::vector<size_t> global = {// number of tiles needed to cover output width
-                                  CeilDiv(output.X().v, otw),
+    dispatchData.gws = { // number of tiles needed to cover output width
+                         CeilDiv(output.X().v, otw),
  
-                                  // number of tiles needed to cover output height
-                                  CeilDiv(output.Y().v, oth),
+                         // number of tiles needed to cover output height
+                         CeilDiv(output.Y().v, oth),
  
-                                  // round depth range up
-                                  Align(weights.OFM().v, SIMD_SIZE) * params.groups * output.Batch().v};
+                         // round depth range up
+                         Align(weights.OFM().v, SIMD_SIZE) * params.groups * output.Batch().v };
  
-    std::vector<size_t> local = {1, 1, SIMD_SIZE};
+    dispatchData.lws = {1, 1, SIMD_SIZE};
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+    dispatchData.cldnnStyle = {0, 0, 0, 0, 0};
+    dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0};
  
      // This kernel is quite slow for 1x1 and KHx1 kernels
      // TODO: check if we need any optimized kernels in this layout
      // If yes, we need to implement some customization for these cases.
-    kd.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  bool ConvolutionKernel_imad::Validate(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h

index 37378c7..c20058d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &p) const override {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp

index adc69bf..2b9c8fb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp
@@ -120,7 +120,7 @@ bool ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::ValidateAutoTuneParams(const convo
  }
  
  ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::AutoTuneParams ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetAutoTuneParams(const convolution_params& params,
-                                                                                                         int index) const {
+                                                                                                                   int index) const {
      AutoTuneParams tune_params;
      bool selected = false;
      if (index >= 0 && index < static_cast<int>(all_tune_params.size())) {
@@ -143,13 +143,13 @@ ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::AutoTuneParams ConvolutionKernel_imad_b
  }
  
  JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetJitConstants(const convolution_params& params,
-                                                                 const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+                                                                      const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
  
-    auto simd = kd.lws0;
-    auto features_per_wi = kd.cldnnStyle.blockHeight;
-    auto lwg_depth = kd.lws2;
-    auto force_prefetch = kd.cldnnStyle.prefetch == 1;
+    auto simd = dispatchData.lws[0];
+    auto features_per_wi = dispatchData.cldnnStyle.blockHeight;
+    auto lwg_depth = dispatchData.lws[2];
+    auto force_prefetch = dispatchData.cldnnStyle.prefetch == 1;
  
      mem_consts.AddConstant(MakeJitConstant("SIMD", simd));
      mem_consts.AddConstant(MakeJitConstant("FEATURES_PER_WI", features_per_wi));
@@ -175,8 +175,8 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetJitConstants(const conv
  }  // GetJitConstants
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::SetDefault(const convolution_params& params,
-                                                                                   int autoTuneIndex) const {
-    DispatchData kd;
+                                                                                        int autoTuneIndex) const {
+    DispatchData dispatchData;
      auto& out = params.output;
  
      auto autoTuneParam = GetAutoTuneParams(params, autoTuneIndex);
@@ -184,31 +184,23 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::Set
      auto simd = autoTuneParam.simd;
      auto features_per_wi = autoTuneParam.features_per_wi;
  
-    std::vector<size_t> global = { RoundUp(out.X().v * out.Y().v, simd), CeilDiv(out.Feature().v, features_per_wi), out.Batch().v * lwg_depth };
-    std::vector<size_t> local = { simd, 1, lwg_depth};
+    dispatchData.gws = { RoundUp(out.X().v * out.Y().v, simd), CeilDiv(out.Feature().v, features_per_wi), out.Batch().v * lwg_depth };
+    dispatchData.lws = { simd, 1, lwg_depth};
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
+    dispatchData.gemmStyle = { 0, 0, 0, 0, 0, 0 };
  
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.cldnnStyle.blockHeight = features_per_wi;
+    dispatchData.cldnnStyle.blockWidth = simd;
+    dispatchData.cldnnStyle.prefetch = autoTuneParam.force_prefetch ? 1 : 0;
  
-    kd.gemmStyle = { 0, 0, 0, 0, 0, 0 };
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    kd.cldnnStyle.blockHeight = features_per_wi;
-    kd.cldnnStyle.blockWidth = simd;
-    kd.cldnnStyle.prefetch = autoTuneParam.force_prefetch ? 1 : 0;
-
-    kd.efficiency = FORCE_PRIORITY_1;
-
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetTunedKernelsDataByIndex(const Params& params,
-                                                                           const optional_params& options,
-                                                                           int autoTuneIndex) const {
+                                                                                const optional_params& options,
+                                                                                int autoTuneIndex) const {
      auto convParams = static_cast<const convolution_params&>(params);
      auto tuneParams = GetAutoTuneParams(convParams, autoTuneIndex);
      return GetCommonKernelsData(params, options, tuneParams.exeMode, autoTuneIndex);
@@ -219,7 +211,7 @@ KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetKernelsData(const Params
  }
  
  KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetKernelsDataForAutoTune(const Params& params,
-                                                                          const optional_params& options) const {
+                                                                               const optional_params& options) const {
      if (!Validate(params, options)) {
          return {};
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h

index 4d66e46..6cf7e8d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
  
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp

index 43b83e2..1e1a229 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp
@@ -273,8 +273,8 @@ ConvolutionKernel_imad_b_fs_yx_fsv4_dw::AutoTuneParams ConvolutionKernel_imad_b_
  }
  
  JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convolution_params& params,
-                                                                     const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+                                                                     const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
  
      size_t filter_block_size = 4;
      size_t min_blocked_leftovers = 4;
@@ -288,7 +288,7 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convo
      }
      mem_consts.AddConstant(MakeJitConstant("FILTER_BLOCKED", filter_blocked));
  
-    auto& work_mode = kd.cldnnStyle.prefetch;
+    auto& work_mode = dispatchData.cldnnStyle.prefetch;
      bool tiled = (work_mode & mode::tiled) != 0;
      bool preload_input = (work_mode & mode::preload_input) != 0;
      bool preload_weights = (work_mode & mode::preload_weights) != 0;
@@ -300,21 +300,21 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convo
  
      if (tiled) {
          preload_weights = true;
-        simd = kd.lws0;
-        tile_x = kd.cldnnStyle.blockWidth;
-        tile_y = kd.cldnnStyle.blockHeight;
+        simd = dispatchData.lws[0];
+        tile_x = dispatchData.cldnnStyle.blockWidth;
+        tile_y = dispatchData.cldnnStyle.blockHeight;
          input_line_size = 1;
          output_block_x = 1;
      } else if (preload_input) {
          tile_x = 1;
-        tile_y = kd.cldnnStyle.blockHeight;
-        output_block_x = kd.cldnnStyle.blockWidth;
+        tile_y = dispatchData.cldnnStyle.blockHeight;
+        output_block_x = dispatchData.cldnnStyle.blockWidth;
          input_line_size = (output_block_x - 1) * params.stride.x + (params.weights.X().v - 1) * params.dilation.x + 1;
      } else {
          tile_x = 1;
          tile_y = 1;
          input_line_size = 1;
-        output_block_x = kd.cldnnStyle.blockWidth;
+        output_block_x = dispatchData.cldnnStyle.blockWidth;
      }
  
      mem_consts.AddConstant(MakeJitConstant("TILED", tiled));
@@ -345,7 +345,7 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convo
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::SetDefault(const convolution_params& params,
                                                                                         int autoTuneIndex) const {
-    DispatchData kd;
+    DispatchData dispatchData;
      auto& out = params.output;
  
      auto autoTuneParam = GetAutoTuneParams(params, autoTuneIndex);
@@ -357,34 +357,26 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::SetD
          global_x = global_x * autoTuneParam.tiled_simd;
      }
  
-    std::vector<size_t> global = { global_x, global_y, CeilDiv(out.Feature().v, fsv) * out.Batch().v };
-    std::vector<size_t> local = { 1, 1, 1 };
+    dispatchData.gws = { global_x, global_y, CeilDiv(out.Feature().v, fsv) * out.Batch().v };
+    dispatchData.lws = { 1, 1, 1 };
  
      if (autoTuneParam.tiled) {
-        local[0] = autoTuneParam.tiled_simd;
+        dispatchData.lws[0] = autoTuneParam.tiled_simd;
      } else {
-        local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
      }
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
+    dispatchData.gemmStyle = { 0, 0, 0, 0, 0, 0 };
  
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.cldnnStyle.blockWidth = autoTuneParam.block_x;
+    dispatchData.cldnnStyle.blockHeight = autoTuneParam.block_y;
+    dispatchData.cldnnStyle.prefetch = (static_cast<size_t>(autoTuneParam.tiled) * mode::tiled)
+                                     | (static_cast<size_t>(autoTuneParam.preload_input) * mode::preload_input)
+                                     | (static_cast<size_t>(autoTuneParam.preload_weights) * mode::preload_weights);
  
-    kd.gemmStyle = { 0, 0, 0, 0, 0, 0 };
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    kd.cldnnStyle.blockWidth = autoTuneParam.block_x;
-    kd.cldnnStyle.blockHeight = autoTuneParam.block_y;
-    kd.cldnnStyle.prefetch = (static_cast<size_t>(autoTuneParam.tiled) * mode::tiled)
-                           | (static_cast<size_t>(autoTuneParam.preload_input) * mode::preload_input)
-                           | (static_cast<size_t>(autoTuneParam.preload_weights) * mode::preload_weights);
-
-    kd.efficiency = FORCE_PRIORITY_1;
-
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetTunedKernelsDataByIndex(const Params& params,
@@ -400,7 +392,7 @@ KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetKernelsData(const Params&
  }
  
  KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetKernelsDataForAutoTune(const Params& params,
-                                                                               const optional_params& options) const {
+                                                                              const optional_params& options) const {
      if (!Validate(params, options)) {
          return {};
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp

index 8021e7d..06a21a5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return false; }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp

index 4205064..4a80120 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp
@@ -57,8 +57,8 @@ KernelsData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetKernelsData(con
      return GetCommonKernelsData(params, options);
  }
  
-JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
          FusedOpsConfiguration conf_scalar = {"",
@@ -77,26 +77,18 @@ JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(c
  }  // GetJitConstants
  
  ConvolutionKernelBase::DispatchData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::SetDefault(const convolution_params& params, int) const {
-    DispatchData kd;
+    DispatchData dispatchData;
      const auto& output = params.output;
  
-    std::vector<size_t> global = {output.X().v, output.Y().v, output.Feature().v / 32 * output.Batch().v};
-    std::vector<size_t> local = {1, 1, SIMD_SIZE};
+    dispatchData.gws = { output.X().v, output.Y().v, output.Feature().v / 32 * output.Batch().v };
+    dispatchData.lws = { 1, 1, SIMD_SIZE};
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
+    dispatchData.cldnnStyle = {0, 0, 0, 0, 0};
+    dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0};
  
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
-
-    kd.efficiency = FORCE_PRIORITY_2;
-
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  bool Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::Validate(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h

index 2091473..8f26280 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
      WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp

index f87066a..fda27f2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp
@@ -57,8 +57,8 @@ KernelsData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetKernelsData(con
      return GetCommonKernelsData(params, options);
  }
  
-JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
+JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    auto mem_consts = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
@@ -77,26 +77,18 @@ JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(c
  }  // GetJitConstants
  
  ConvolutionKernelBase::DispatchData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::SetDefault(const convolution_params& params, int) const {
-    DispatchData kd;
+    DispatchData dispatchData;
      const auto& output = params.output;
  
-    std::vector<size_t> global = {output.X().v, output.Y().v, output.Feature().v / 16 * output.Batch().v};
-    std::vector<size_t> local = {1, 1, SIMD_SIZE};
+    dispatchData.gws = { output.X().v, output.Y().v, output.Feature().v / 16 * output.Batch().v };
+    dispatchData.lws = { 1, 1, SIMD_SIZE };
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
+    dispatchData.cldnnStyle = {0, 0, 0, 0, 0};
+    dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0};
  
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
-
-    kd.efficiency = FORCE_PRIORITY_2;
-
-    return kd;
+    return dispatchData;
  }  // SetDefault
  
  bool Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::Validate(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h

index eaf7d07..677ccfe 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
      WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp

index 01eefa0..cb10986 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp
@@ -96,46 +96,46 @@ ConvolutionKernel_mmad_b_fs_yx_fsv32::AutoTuneOption ConvolutionKernel_mmad_b_fs
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_b_fs_yx_fsv32::SetDefault(const convolution_params& cp,
                                                                                       int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
      auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
-    runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
-    runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
-    runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+    dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+    dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      size_t ow_group = 8;
      while (ow_group > 1) {
-        if (CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth) % ow_group == 0)
+        if (CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth) % ow_group == 0)
              break;
          ow_group--;
      }
  
-    runInfo.gws0 = Align(cp.output.Feature().v, 32) / 4;
-    runInfo.gws1 = Align(CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth), ow_group) * cp.output.Y().v * cp.output.Z().v;
-    runInfo.gws2 = cp.output.Batch().v;
+    dispatchData.gws[0] = Align(cp.output.Feature().v, 32) / 4;
+    dispatchData.gws[1] = Align(CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth), ow_group) * cp.output.Y().v * cp.output.Z().v;
+    dispatchData.gws[2] = cp.output.Batch().v;
  
-    runInfo.lws0 = 8;
-    runInfo.lws1 = ow_group;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 8;
+    dispatchData.lws[1] = ow_group;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_mmad_b_fs_yx_fsv32::GetJitConstants(const convolution_params& params,
-                                                                   const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                   const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("OW_GROUP", runInfo.lws1));
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws0));
+    jit.AddConstant(MakeJitConstant("OW_GROUP", dispatchData.lws[1]));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]));
      jit.AddConstant(MakeJitConstant("OSV_SIZE", 32));
      jit.AddConstant(MakeJitConstant("ISV_SIZE", 32));
-    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", runInfo.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", dispatchData.cldnnStyle.blockWidth));
      jit.AddConstant(MakeJitConstant("IFM_BLOCKS", CeilDiv(params.inputs[0].Feature().v, 32)));
      auto input = params.inputs[0];
      auto output = params.output;
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
      size_t input_line_size = params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1)*params.dilation.x + 1;
  
      jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", blockWidth));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h

index 933f6c7..d88972b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h
@@ -33,7 +33,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return false; }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp

index 35926d8..862800a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp
@@ -75,28 +75,20 @@ bool ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::Validate(const Params& p, const op
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::SetDefault(const convolution_params& cp,
                                                                                          int /*autoTuneIndex*/) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    std::vector<size_t> global = {cp.output.Feature().v, cp.output.X().v * cp.output.Y().v, cp.output.Batch().v};
+    dispatchData.gws = { cp.output.Feature().v, cp.output.X().v * cp.output.Y().v, cp.output.Batch().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, cp.engineInfo);
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    auto local = GetOptimalLocalWorkGroupSizes(global, cp.engineInfo);
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  // TODO: optimize this kernel
  JitConstants ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::GetJitConstants(const convolution_params& params,
-                                                                      const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                      const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h

index 1e1efb3..26b3e45 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h
@@ -33,7 +33,7 @@ public:
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
          return WeightsLayout::goiyx;
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp

index cf352d5..551c2aa 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp
@@ -84,7 +84,7 @@ bool ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::Validate(const Params &p, con
  }
  
  ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::AutoTuneOption ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetAutoTuneOptions(const Params &p,
-                                                                                                                        int autoTuneIndex) const {
+                                                                                                                              int autoTuneIndex) const {
      if ((autoTuneIndex >= 0) && (autoTuneIndex < static_cast<int>(autoTuneOptions.size()))) {
          return autoTuneOptions[autoTuneIndex];
      }
@@ -150,50 +150,50 @@ static size_t get_lws(const convolution_params &cp, size_t blocks_count, size_t
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::SetDefault(const convolution_params &cp,
                                                                                               int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
      auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
-    runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
-    runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
-    runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+    dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+    dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      const size_t max_lws = std::max((size_t)1, cp.engineInfo.maxWorkGroupSize / sub_group_size);
-    runInfo.gws0 = Align(cp.output.Feature().v, 32) / 2;
-    runInfo.gws1 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
-    runInfo.gws2 = cp.output.Batch().v * cp.output.Y().v * cp.output.Z().v;
+    dispatchData.gws[0] = Align(cp.output.Feature().v, 32) / 2;
+    dispatchData.gws[1] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth);
+    dispatchData.gws[2] = cp.output.Batch().v * cp.output.Y().v * cp.output.Z().v;
  
-    runInfo.lws0 = sub_group_size;
-    runInfo.lws1 = get_lws(cp, runInfo.gws1, tuneOptions.blockWidth, max_lws);
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = sub_group_size;
+    dispatchData.lws[1] = get_lws(cp, dispatchData.gws[1], tuneOptions.blockWidth, max_lws);
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetJitConstants(const convolution_params &params,
-                                                                           const DispatchData &runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                           const DispatchData &dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws0));
-    jit.AddConstant(MakeJitConstant("LWS0", runInfo.lws0));
-    jit.AddConstant(MakeJitConstant("LWS1", runInfo.lws1));
-    jit.AddConstant(MakeJitConstant("LWS2", runInfo.lws2));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]));
+    jit.AddConstant(MakeJitConstant("LWS0", dispatchData.lws[0]));
+    jit.AddConstant(MakeJitConstant("LWS1", dispatchData.lws[1]));
+    jit.AddConstant(MakeJitConstant("LWS2", dispatchData.lws[2]));
      jit.AddConstant(MakeJitConstant("OSV", 32));
-    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", runInfo.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", dispatchData.cldnnStyle.blockWidth));
      auto input = params.inputs[0];
      auto output = params.output;
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
-    size_t slm_line_size = params.stride.x * (runInfo.lws1 * blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1;
-    size_t slm_chunk_size = slm_line_size / runInfo.lws1;
-    size_t slm_tail = slm_line_size % runInfo.lws1;
-    size_t slm_line_aligned = slm_chunk_size*runInfo.lws1 + Align(slm_tail, sub_group_size);
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
+    size_t slm_line_size = params.stride.x * (dispatchData.lws[1] * blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1;
+    size_t slm_chunk_size = slm_line_size / dispatchData.lws[1];
+    size_t slm_tail = slm_line_size % dispatchData.lws[1];
+    size_t slm_line_aligned = slm_chunk_size*dispatchData.lws[1] + Align(slm_tail, sub_group_size);
  
      size_t input_line_size = params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1;
  
      jit.AddConstant(MakeJitConstant("INPUT_LINE_SIZE", input_line_size));
      jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", blockWidth));
-    jit.AddConstant(MakeJitConstant("GROUP_SIZE", blockWidth * runInfo.lws1));
+    jit.AddConstant(MakeJitConstant("GROUP_SIZE", blockWidth * dispatchData.lws[1]));
      jit.AddConstant(MakeJitConstant("SLM_LINE_SIZE", slm_line_aligned));
      jit.AddConstant(MakeJitConstant("SLM_CHUNK_SIZE", slm_chunk_size));
      jit.AddConstant(MakeJitConstant("SLM_TAIL", slm_tail));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h

index 7ed3da4..8bf8428 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h
@@ -33,7 +33,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &p) const override {
          if (p.output.GetDType() == Datatype::F16 || p.output.GetDType() == Datatype::F32 ||
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp

index f9110ac..456d9b1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp
@@ -87,39 +87,39 @@ ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::AutoTuneOption ConvolutionKernel_mm
  }
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::SetDefault(const convolution_params &cp,
-                                                                                          int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+                                                                                            int autoTuneIndex) const {
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp);
  
      auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
-    runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
-    runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
-    runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+    dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+    dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    runInfo.gws0 = Align(cp.output.Feature().v, 32) / 2;
-    runInfo.gws1 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth) * cp.output.Y().v;
-    runInfo.gws2 = cp.output.Batch().v;
+    dispatchData.gws[0] = Align(cp.output.Feature().v, 32) / 2;
+    dispatchData.gws[1] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth) * cp.output.Y().v;
+    dispatchData.gws[2] = cp.output.Batch().v;
  
-    runInfo.lws0 = 16;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 16;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetJitConstants(const convolution_params &params,
-                                                                        const DispatchData &runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                        const DispatchData &dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws0));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]));
      jit.AddConstant(MakeJitConstant("OSV", 32));
      jit.AddConstant(MakeJitConstant("ISV", 32));
-    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", runInfo.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", dispatchData.cldnnStyle.blockWidth));
      jit.AddConstant(MakeJitConstant("IFM_BLOCKS", CeilDiv(params.inputs[0].Feature().v, 32)));
      auto input = params.inputs[0];
      auto output = params.output;
-    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    auto blockWidth = dispatchData.cldnnStyle.blockWidth;
      size_t input_line_size = std::min(params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1,
                                        input.X().v + input.X().pad.Total());
  
@@ -149,7 +149,7 @@ KernelsData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetKernelsData(const Pa
  }
  
  KernelsData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetKernelsDataForAutoTune(const Params &params,
-                                                                                 const optional_params &options) const {
+                                                                                   const optional_params &options) const {
      if (!Validate(params, options)) {
          return {};
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h

index 93c7a18..aa8e593 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h
@@ -33,7 +33,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
          return WeightsLayout::os_is_yx_osv32_isv4_swizzled_by_2;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp

index 764f753..da4b5cd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -67,8 +67,8 @@ KernelsData ConvolutionKernel_Ref::GetKernelsData(const Params& params, const op
      return GetTunedKernelsDataByIndex(params, options);
  }
  
-JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
-    JitConstants jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
      Datatype accumulator_dt;
      Datatype activation_dt;
@@ -100,7 +100,7 @@ JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& pa
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const convolution_params& params,
                                                                        int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
  
      // FIXME: ConvolutionKernelBase::SetDefault should probably be pure and
      // not setting these at all as it's something specific to a concrete
@@ -111,18 +111,9 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const conv
      // Just set the correct value for a particular implementation here,
      // until the whole hierarchy is re-written.
      const auto& out = params.output;
-    std::vector<size_t> global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
-
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-    return kd;
+    dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+    return dispatchData;
  }
  
  bool ConvolutionKernel_Ref::Validate(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h

index 5e27f68..eeb3a9c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h
@@ -43,7 +43,7 @@ protected:
                   FusedOpType::ACTIVATION };
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool Validate(const Params& params, const optional_params& options) const override;
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp

index ac920de..f7f5637 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp
@@ -44,8 +44,8 @@ ParamsKey ConvolutionKernel_Winograd_2x3_s1::GetSupportedKey() const {
  }
  
  JitConstants ConvolutionKernel_Winograd_2x3_s1::GetJitConstants(const convolution_params& params,
-                                                                const DispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+                                                                const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      const size_t input_tile_width = winograd_input_tile_width;
      const size_t input_tile_height = winograd_input_tile_height;
@@ -70,10 +70,9 @@ JitConstants ConvolutionKernel_Winograd_2x3_s1::GetJitConstants(const convolutio
      return jit;
  }
  
-ConvolutionKernel_Winograd_2x3_s1::Parent::DispatchData ConvolutionKernel_Winograd_2x3_s1::SetDefault(
-    const convolution_params& arg,
-    int) const {
-    Parent::DispatchData runInfo = Parent::SetDefault(arg);
+ConvolutionKernel_Winograd_2x3_s1::Parent::DispatchData ConvolutionKernel_Winograd_2x3_s1::SetDefault(const convolution_params& arg,
+                                                                                                      int) const {
+    Parent::DispatchData dispatchData = Parent::SetDefault(arg);
  
      const size_t tile_n = winograd_tile_n;  // goes in-depth
      const size_t tile_m = winograd_tile_m;  // goes over flattened x and y
@@ -86,17 +85,17 @@ ConvolutionKernel_Winograd_2x3_s1::Parent::DispatchData ConvolutionKernel_Winogr
                                                          // width by tile's width to get tiles count
      const size_t nr_tiles_y = Align(arg.output.Y().v, 8) / input_tile_height;
  
-    runInfo.gws0 = arg.output.Feature().v / tile_n;
-    runInfo.gws1 = nr_tiles_x * nr_tiles_y / tile_m;
-    runInfo.gws2 = input_tile_width * input_tile_height * arg.inputs[0].Batch().v;
+    dispatchData.gws[0] = arg.output.Feature().v / tile_n;
+    dispatchData.gws[1] = nr_tiles_x * nr_tiles_y / tile_m;
+    dispatchData.gws[2] = input_tile_width * input_tile_height * arg.inputs[0].Batch().v;
  
-    runInfo.lws0 = 8;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 8;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    runInfo.efficiency = FORCE_PRIORITY_4;
+    dispatchData.efficiency = FORCE_PRIORITY_4;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_Winograd_2x3_s1::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h

index e9f032a..66d7b50 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h
@@ -34,8 +34,8 @@ protected:
          return WeightsLayout::winograd_2x3_s1_weights;
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp

index 43db00b..b24d05b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp
@@ -38,8 +38,8 @@ ParamsKey ConvolutionKernel_Winograd_2x3_s1_fused::GetSupportedKey() const {
  }
  
  JitConstants ConvolutionKernel_Winograd_2x3_s1_fused::GetJitConstants(const convolution_params& params,
-                                                                      const DispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+                                                                      const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      const auto idepth = params.inputs[0].Feature().v;
      const auto input_pad_y = params.inputs[0].Y().pad.before + params.inputs[0].Y().pad.after;
@@ -83,7 +83,7 @@ JitConstants ConvolutionKernel_Winograd_2x3_s1_fused::GetJitConstants(const conv
  ConvolutionKernel_Winograd_2x3_s1_fused::Parent::DispatchData ConvolutionKernel_Winograd_2x3_s1_fused::SetDefault(
      const convolution_params& arg,
      int) const {
-    Parent::DispatchData runInfo = Parent::SetDefault(arg);
+    Parent::DispatchData dispatchData = Parent::SetDefault(arg);
  
      const auto odepth = arg.output.Feature().v;
      const auto input_pad_y = arg.inputs[0].Y().pad.before + arg.inputs[0].Y().pad.after;
@@ -100,21 +100,21 @@ ConvolutionKernel_Winograd_2x3_s1_fused::Parent::DispatchData ConvolutionKernel_
      auto K = odepth;
      auto N = 1;
  
-    uint32_t global_step[3] = {14, 4, 16 * 8};
-    uint32_t local_size[3] = {8, 2, 8};
+    size_t global_step[3] = {14, 4, 16 * 8};
+    size_t local_size[3] = {8, 2, 8};
  
-    uint32_t zStep = local_size[2];
-    runInfo.gws0 = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0];
-    runInfo.gws1 = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1];
-    runInfo.gws2 = ((uint32_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * zStep;
+    size_t zStep = local_size[2];
+    dispatchData.gws[0] = ((size_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0];
+    dispatchData.gws[1] = ((size_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1];
+    dispatchData.gws[2] = ((size_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * zStep;
  
-    runInfo.lws0 = local_size[0];
-    runInfo.lws1 = local_size[1];
-    runInfo.lws2 = local_size[2];
+    dispatchData.lws[0] = local_size[0];
+    dispatchData.lws[1] = local_size[1];
+    dispatchData.lws[2] = local_size[2];
  
-    runInfo.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_Winograd_2x3_s1_fused::Validate(const Params& p, const optional_params& o) const {
@@ -141,4 +141,4 @@ KernelsData ConvolutionKernel_Winograd_2x3_s1_fused::GetKernelsData(const Params
                                                                      const optional_params& options) const {
      return GetTunedKernelsDataByIndex(params, options);
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h

index 269bd19..6cfdbde 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h
@@ -34,8 +34,8 @@ protected:
          return WeightsLayout::winograd_2x3_s1_fused_weights;
      }
  
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp

index 61be9ab..42fc634 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp
@@ -39,8 +39,8 @@ ParamsKey ConvolutionKernel_Winograd_6x3_s1_fused::GetSupportedKey() const {
  }
  
  JitConstants ConvolutionKernel_Winograd_6x3_s1_fused::GetJitConstants(const convolution_params& params,
-                                                                      const DispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+                                                                      const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      const auto idepth = params.inputs[0].Feature().v;
      const auto input_pad_y = params.inputs[0].Y().pad.before + params.inputs[0].Y().pad.after;
@@ -95,7 +95,7 @@ WeightsLayout ConvolutionKernel_Winograd_6x3_s1_fused::GetPreferredWeightsLayout
  ConvolutionKernel_Winograd_6x3_s1_fused::Parent::DispatchData ConvolutionKernel_Winograd_6x3_s1_fused::SetDefault(
      const convolution_params& arg,
      int) const {
-    Parent::DispatchData runInfo = Parent::SetDefault(arg);
+    Parent::DispatchData dispatchData = Parent::SetDefault(arg);
  
      const auto odepth = arg.output.Feature().v;
      const auto input_pad_y = arg.inputs[0].Y().pad.before + arg.inputs[0].Y().pad.after;
@@ -115,17 +115,17 @@ ConvolutionKernel_Winograd_6x3_s1_fused::Parent::DispatchData ConvolutionKernel_
      uint32_t global_step[3] = {14, 6, 16 * 8};
      uint32_t local_size[3] = {16, 1, 8};
  
-    runInfo.gws0 = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0];
-    runInfo.gws1 = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1];
-    runInfo.gws2 = ((uint32_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * local_size[2];
+    dispatchData.gws[0] = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0];
+    dispatchData.gws[1] = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1];
+    dispatchData.gws[2] = ((uint32_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * local_size[2];
  
-    runInfo.lws0 = local_size[0];
-    runInfo.lws1 = local_size[1];
-    runInfo.lws2 = local_size[2];
+    dispatchData.lws[0] = local_size[0];
+    dispatchData.lws[1] = local_size[1];
+    dispatchData.lws[2] = local_size[2];
  
-    runInfo.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_Winograd_6x3_s1_fused::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h

index 6c4a522..c5c657c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h
@@ -30,9 +30,9 @@ public:
      ParamsKey GetSupportedKey() const override;
  
  protected:
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp

index d67b61f..541ca75 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp
@@ -77,7 +77,7 @@ size_t GetOfmPerWorkitem(Datatype dataType) {
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b16::SetDefault(const convolution_params& arg,
                                                                                  int) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg);
  
      const auto filter_ofm_num = arg.weights.OFM().v * arg.weights.G().v;
      const auto batch_size = arg.output.Batch().v;
@@ -87,15 +87,15 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b16::SetDefault(
      const size_t ofmPerWorkItem = GetOfmPerWorkitem(arg.inputs[0].GetDType());
  
      if (arg.inputs[0].GetDType() == Datatype::F16) {
-        runInfo.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
      } else {
-        runInfo.efficiency = FORCE_PRIORITY_9;
+        dispatchData.efficiency = FORCE_PRIORITY_9;
      }
  
-    runInfo.lws0 = min_lws;
-    runInfo.gws0 = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem);
+    dispatchData.lws[0] = min_lws;
+    dispatchData.gws[0] = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem);
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_yxfb_yxio_b16::Validate(const Params& p, const optional_params& o) const {
@@ -140,10 +140,10 @@ bool ConvolutionKernel_yxfb_yxio_b16::Validate(const Params& p, const optional_p
  }
  
  JitConstants ConvolutionKernel_yxfb_yxio_b16::GetJitConstants(const convolution_params& params,
-                                                              const DispatchData& kd) const {
-    auto jit = Parent::GetJitConstants(params, kd);
+                                                              const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    const auto local_work_group_size = kd.lws0;
+    const auto local_work_group_size = dispatchData.lws[0];
      const auto batch_size = params.output.Batch().v;
  
      if (params.inputs[0].GetDType() == Datatype::F32) {
@@ -168,7 +168,7 @@ JitConstants ConvolutionKernel_yxfb_yxio_b16::GetJitConstants(const convolution_
      const size_t ofmPerWorkItem = GetOfmPerWorkitem(params.inputs[0].GetDType());
  
      jit.AddConstants({
-        MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0),
+        MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]),
          MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem),
          MakeJitConstant("BATCHES_PER_WORK_ITEM",
                          batchesPerWorkItem),  // how many batches will a single work item compute
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h

index c57db65..e1c3aa1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h
@@ -36,7 +36,7 @@ protected:
      }
      std::string GetKernelName(const convolution_params&) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp

index 5d4b1e1..39f42b0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -38,16 +38,16 @@ ParamsKey ConvolutionKernel_yxfb_yxio_b1_block::GetSupportedKey() const {
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b1_block::SetDefault(const convolution_params& arg,
                                                                                       int) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg);
      // TODO: fill the proper data here (I don't know where can I locate it).
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_yxfb_yxio_b1_block::GetJitConstants(const convolution_params& params,
-                                                                   const DispatchData& kd) const {
-    auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                   const DispatchData& dispatchData) const {
+    auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
-    cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0));
+    cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]));
      return cldnn_jit;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h

index e274092..e7b1aa9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h
@@ -29,10 +29,10 @@ public:
      ParamsKey GetSupportedKey() const override;
  
  protected:
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
          return WeightsLayout::yxio;
      }
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp

index c2e7b82..c690b49 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -51,12 +51,12 @@ size_t GetOfmPerWorkitem(size_t filter_ofm_num, size_t localWorkSize) {
  ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::SetDefault(
      const convolution_params& arg,
      int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex);
  
      const auto filter_ofm_num = arg.weights.OFM().v;
      const auto batch_size = arg.output.Batch().v;
  
-    runInfo.lws0 = local_work_size;
+    dispatchData.lws[0] = local_work_size;
  
      // We cannot return 8 because we are processing 4 spatial coordinates for batch1,
      // and if we use more than 4 ofm_per_work_item we downgrade simd16 to simd8 which would break this algorithm.
@@ -65,28 +65,28 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b1_block_mulitpl
      // TODO: experiment with SIMD8 version of algorithm and check if it could be faster
      /*if (output_feature_count % (lws * 8) == 0)
          {
-        run_info.ofm_per_work_item = 8;
-        run_info.gws1 = static_cast<size_t>(std::ceil(static_cast<float>(run_info.gws1) / 2.0f));
+        dispatchData.ofm_per_work_item = 8;
+        dispatchData.gws[1] = static_cast<size_t>(std::ceil(static_cast<float>(dispatchData.gws[1]) / 2.0f));
          }
          else*/
      const size_t ofmPerWorkItem = GetOfmPerWorkitem(filter_ofm_num, local_work_size);
      if (ofmPerWorkItem == 4) {
          // We compute multiple spatial coordinates "x" in a single workitem that's why we must divide
-        runInfo.gws1 = static_cast<size_t>(std::ceil(static_cast<float>(runInfo.gws1) / 4.0f));
+        dispatchData.gws[1] = static_cast<size_t>(std::ceil(static_cast<float>(dispatchData.gws[1]) / 4.0f));
      } else if (ofmPerWorkItem == 2) {
-        runInfo.gws1 = static_cast<size_t>(std::ceil(static_cast<float>(runInfo.gws1) / 8.0f));
+        dispatchData.gws[1] = static_cast<size_t>(std::ceil(static_cast<float>(dispatchData.gws[1]) / 8.0f));
      } else {
-        runInfo.gws1 = static_cast<size_t>(std::ceil(static_cast<float>(runInfo.gws1) / 8.0f));
+        dispatchData.gws[1] = static_cast<size_t>(std::ceil(static_cast<float>(dispatchData.gws[1]) / 8.0f));
      }
  
-    runInfo.gws0 = filter_ofm_num * batch_size / ofmPerWorkItem;
+    dispatchData.gws[0] = filter_ofm_num * batch_size / ofmPerWorkItem;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::GetJitConstants(const convolution_params& params,
-                                                                              const DispatchData& kd) const {
-    auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                                              const DispatchData& dispatchData) const {
+    auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
      size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, local_work_size);
      cldnn_jit.AddConstant(MakeJitConstant("USE_VECTOR", ofmPerWorkItem));
@@ -101,7 +101,7 @@ JitConstants ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::GetJitConstants(co
      cldnn_jit.AddConstant(MakeJitConstant(
          "OFM_PER_WORK_ITEM",
          ofmPerWorkItem));  // how many output feature maps for a single batch will a single work item produce
-    cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0));
+    cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]));
      return cldnn_jit;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h

index 6d91c0c..e1ff764 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h
@@ -34,7 +34,7 @@ protected:
          return WeightsLayout::yxio;
      }
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp

index f873379..1d573c4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp
@@ -48,22 +48,22 @@ size_t GetOfmPerWorkitem(size_t filterOfmNum, size_t batchSize, size_t local_wor
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b8::SetDefault(const convolution_params& arg,
                                                                                 int autoTuneIndex) const {
-    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex);
  
      const auto filterOfmNum = arg.weights.OFM().v;
      const auto batchSize = arg.output.Batch().v;
  
-    runInfo.lws0 = batchSize == 8 ? 8 : 16;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = batchSize == 8 ? 8 : 16;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    size_t ofmPerWorkItem = GetOfmPerWorkitem(filterOfmNum, batchSize, runInfo.lws0);
+    size_t ofmPerWorkItem = GetOfmPerWorkitem(filterOfmNum, batchSize, dispatchData.lws[0]);
  
-    runInfo.gws0 = filterOfmNum * batchSize / ofmPerWorkItem;
+    dispatchData.gws[0] = filterOfmNum * batchSize / ofmPerWorkItem;
  
-    runInfo.efficiency = FORCE_PRIORITY_9;
+    dispatchData.efficiency = FORCE_PRIORITY_9;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ConvolutionKernel_yxfb_yxio_b8::Validate(const Params& p, const optional_params& o) const {
@@ -99,13 +99,13 @@ bool ConvolutionKernel_yxfb_yxio_b8::Validate(const Params& p, const optional_pa
  }
  
  JitConstants ConvolutionKernel_yxfb_yxio_b8::GetJitConstants(const convolution_params& params,
-                                                             const DispatchData& kd) const {
-    JitConstants jits = ConvolutionKernelBase::GetJitConstants(params, kd);
+                                                             const DispatchData& dispatchData) const {
+    JitConstants jits = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
  
-    size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, params.output.Batch().v, kd.lws0);
+    size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, params.output.Batch().v, dispatchData.lws[0]);
  
      jits.AddConstant(MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem));
-    jits.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0));
+    jits.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]));
  
      return jits;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h

index c4f8f3b..669ec8f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h
@@ -29,11 +29,11 @@ public:
      ParamsKey GetSupportedKey() const override;
  
  protected:
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
          return WeightsLayout::yxio;
      }
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp

index a5687d6..106bac5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp
@@ -48,7 +48,7 @@ ParamsKey DeformableConvolutionKernel_bfyx_conv::GetSupportedKey() const {
  
  DeformableConvolutionKernel_bfyx_conv::DispatchData DeformableConvolutionKernel_bfyx_conv::SetDefault(const convolution_params& params,
                                                                                                        int autoTuneIndex) const {
-    DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
+    DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
  
      const auto& out = params.output;
  
@@ -57,21 +57,21 @@ DeformableConvolutionKernel_bfyx_conv::DispatchData DeformableConvolutionKernel_
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = CeilDiv(x * y, 16);
-    kd.gws1 = Align(f, 16);
-    kd.gws2 = b;
+    dispatchData.gws[0] = CeilDiv(x * y, 16);
+    dispatchData.gws[1] = Align(f, 16);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = 16;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 16;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  JitConstants DeformableConvolutionKernel_bfyx_conv::GetJitConstants(const convolution_params& params,
-                                                                    const DispatchData& /*kd*/) const {
+                                                                    const DispatchData& /*dispatchData*/) const {
      JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
      jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", 16));
      jit.AddConstant(MakeJitConstant("INPUT_CHANNELS", params.inputs[0].Feature().v / params.weights.X().v / params.weights.Y().v));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h

index eb0eb06..330874f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h
@@ -30,7 +30,7 @@ public:
  
  protected:
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override;
      ParamsKey GetSupportedKey() const override;
      WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
          return WeightsLayout::os_is_yx_isv16_osv16;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp

index 26fc779..ff2c3f4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp
@@ -45,7 +45,7 @@ ParamsKey DeformableConvolutionKernel_bfyx_interp::GetSupportedKey() const {
  }
  
  CommonDispatchData DeformableConvolutionKernel_bfyx_interp::SetDefault(const convolution_params& params) const {
-    CommonDispatchData kd;
+    CommonDispatchData dispatchData;
  
      const auto& out = params.output;
  
@@ -54,17 +54,17 @@ CommonDispatchData DeformableConvolutionKernel_bfyx_interp::SetDefault(const con
      auto b = out.Batch().v;
      auto kernel_size = params.kernelSize.x * params.kernelSize.y;
  
-    kd.gws0 = Align(x * y, 16);
-    kd.gws1 = params.deformable_groups * b;
-    kd.gws2 = kernel_size;
+    dispatchData.gws[0] = Align(x * y, 16);
+    dispatchData.gws[1] = params.deformable_groups * b;
+    dispatchData.gws[2] = kernel_size;
  
-    kd.lws0 = 16;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 16;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  
@@ -91,14 +91,14 @@ KernelsData DeformableConvolutionKernel_bfyx_interp::GetKernelsData(const Params
      KernelData kd = KernelData::Default<convolution_params>(params);
      convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
  
-    CommonDispatchData runInfo = SetDefault(newParams);
+    CommonDispatchData dispatchData = SetDefault(newParams);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT,
                       false, false, static_cast<int>(newParams.inputs.size()));
  
      return {kd};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp

index 8f5384a..2688edb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp
@@ -33,26 +33,17 @@ JitConstants CTCGreedyDecoderKernelBase::GetJitConstants(const ctc_greedy_decode
  }
  
  CTCGreedyDecoderKernelBase::DispatchData CTCGreedyDecoderKernelBase::SetDefault(const ctc_greedy_decoder_params& params) const {
-    DispatchData kd;
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    DispatchData dispatchData;
  
-    std::vector<size_t> global = { 1, 1, 1 };
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { 1, 1, 1 };
+    dispatchData.lws= GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& params,
-                                                const optional_params& options,
-                                                float estimated_time) const {
+                                                             const optional_params& options,
+                                                             float estimated_time) const {
      assert(params.GetType() == KernelType::CTC_GREEDY_DECODER);
  
      if (!Validate(params, options))
@@ -60,19 +51,17 @@ KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& param
  
      const ctc_greedy_decoder_params& orgParams = static_cast<const ctc_greedy_decoder_params&>(params);
  
-    DispatchData runInfo;
-
-    runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<ctc_greedy_decoder_params>(params);
  
-    auto cldnn_jit = GetJitConstants(orgParams, runInfo);
+    auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
      auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h

index 9c14de1..0181bce 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h
@@ -44,7 +44,7 @@ public:
      using DispatchData = CommonDispatchData;
  
  protected:
-    virtual JitConstants GetJitConstants(const ctc_greedy_decoder_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const ctc_greedy_decoder_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const ctc_greedy_decoder_params& params) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp

index 43a3cec..3941259 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp
@@ -83,22 +83,13 @@ JitConstants CumSumKernelBase::GetJitConstants(const cum_sum_params& params, Dis
  }
  
  CumSumKernelBase::DispatchData CumSumKernelBase::SetDefault(const cum_sum_params& params) const {
-    DispatchData runInfo;
-    std::vector<size_t> global = {params.output.Batch().v,
-                                  params.output.Feature().v * params.output.W().v,
-                                  params.output.Z().v * params.output.Y().v * params.output.X().v};
+    DispatchData dispatchData;
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v * params.output.W().v,
+                         params.output.Z().v * params.output.Y().v * params.output.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
@@ -111,14 +102,14 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
          return {};
      }
  
-    auto runInfo = SetDefault(newParams);
+    auto dispatchData = SetDefault(newParams);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-    auto cldnn_jit = GetJitConstants(newParams, runInfo);
+    auto cldnn_jit = GetJitConstants(newParams, dispatchData);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = estimatedTime;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h

index 920551f..4ec34bb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h
@@ -56,7 +56,7 @@ protected:
      int32_t GetCumSumAxisIndex(const cum_sum_params& params) const;
      size_t GetRealAxisIndex(const cum_sum_params& params) const;
      ParamsKey GetSupportedKey() const override;
-    virtual JitConstants GetJitConstants(const cum_sum_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const cum_sum_params& params) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
      bool Validate(const Params&, const optional_params&) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp

index 89d91b4..2a2a2a2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp
@@ -25,15 +25,15 @@ namespace kernel_selector {
  static constexpr size_t simd = 16;
  static constexpr size_t BLOCK_SIZE = 16;
  
-JitConstants CumSumKernelPartialSum::GetJitConstants(const cum_sum_params& params, DispatchData kd) const {
-    auto jits = CumSumKernelBase::GetJitConstants(params, kd);
+JitConstants CumSumKernelPartialSum::GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const {
+    auto jits = CumSumKernelBase::GetJitConstants(params, dispatchData);
  
      auto activation_dt = GetActivationType(params);
      jits.Merge(MakeTypeJitConstants(activation_dt, "PARTIAL"));
      jits.AddConstant(MakeJitConstant("SIMD", simd));
-    jits.AddConstant(MakeJitConstant("LWS", kd.lws0));
+    jits.AddConstant(MakeJitConstant("LWS", dispatchData.lws[0]));
      jits.AddConstant(MakeJitConstant("BLOCK_SIZE", BLOCK_SIZE));
-    jits.AddConstant(MakeJitConstant("SUM_ITEMS_NUM", kd.sum_items_num));
+    jits.AddConstant(MakeJitConstant("SUM_ITEMS_NUM", dispatchData.sum_items_num));
  
      return jits;
  }
@@ -48,15 +48,15 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
      KernelData kd = KernelData::Default<cum_sum_params>(params, kernels_num);
      const cum_sum_params& newParams = *static_cast<cum_sum_params*>(kd.params.get());
  
-    auto runInfo = SetDefaultForMulti(newParams);
+    auto dispatchData = SetDefaultForMulti(newParams);
      {
          // partial sum
-        auto cldnn_jit = GetJitConstants(newParams, runInfo.stage_1);
+        auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_1);
          cldnn_jit.AddConstant(MakeJitConstant("CUM_SUM_PARTIAL_SUM", 1));
          auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
          auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
          auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo.stage_1, params.engineInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, dispatchData.stage_1, params.engineInfo, kernelName, jit, entry_point);
          kernel.arguments.clear();  // Clear original output argument
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
          kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
@@ -65,12 +65,12 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
      {
          // Final
          auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-        auto cldnn_jit = GetJitConstants(newParams, runInfo.stage_final);
+        auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_final);
          std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
          auto& kernel = kd.kernels[1];
  
-        FillCLKernelData(kernel, runInfo.stage_final, params.engineInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, dispatchData.stage_final, params.engineInfo, kernelName, jit, entry_point);
  
          kernel.arguments.clear();  // Clear original output argument
          kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
@@ -83,7 +83,7 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
  }
  
  CumSumKernelPartialSum::MultiDispatchData CumSumKernelPartialSum::SetDefaultForMulti(const cum_sum_params& params) const {
-    MultiDispatchData md;
+    MultiDispatchData dispatchData;
      std::vector<size_t> dims = {params.output.Batch().v,
                                  params.output.Feature().v,
                                  params.output.W().v,
@@ -108,23 +108,19 @@ CumSumKernelPartialSum::MultiDispatchData CumSumKernelPartialSum::SetDefaultForM
          }
      }
  
-    md.stage_1.gws0 = Align(gws[0], BLOCK_SIZE);
-    md.stage_1.gws1 = gws[1];
-    md.stage_1.gws2 = gws[2];
-    md.stage_1.lws0 = BLOCK_SIZE;
-    md.stage_1.lws1 = 1;
-    md.stage_1.lws2 = 1;
-    md.stage_1.sum_items_num = items_num;
-
-    md.stage_final.gws0 = gws[0];
-    md.stage_final.gws1 = gws[1];
-    md.stage_final.gws2 = gws[2];
-    md.stage_final.lws0 = 1;
-    md.stage_final.lws1 = 1;
-    md.stage_final.lws2 = 1;
-    md.stage_final.sum_items_num = Align(items_num, BLOCK_SIZE);
-
-    return md;
+    dispatchData.stage_1.gws[0] = Align(gws[0], BLOCK_SIZE);
+    dispatchData.stage_1.gws[1] = gws[1];
+    dispatchData.stage_1.gws[2] = gws[2];
+    dispatchData.stage_1.lws[0] = BLOCK_SIZE;
+    dispatchData.stage_1.lws[1] = 1;
+    dispatchData.stage_1.lws[2] = 1;
+    dispatchData.stage_1.sum_items_num = items_num;
+
+    dispatchData.stage_final.gws = gws;
+    dispatchData.stage_final.lws = { 1, 1, 1 };
+    dispatchData.stage_final.sum_items_num = Align(items_num, BLOCK_SIZE);
+
+    return dispatchData;
  }
  
  KernelsData CumSumKernelPartialSum::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h

index d092e82..40330cd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h
@@ -29,7 +29,7 @@ protected:
          DispatchData stage_final;
      };
  
-    JitConstants GetJitConstants(const cum_sum_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const override;
      KernelsData GetMultiStageKernelsData(const Params& params, const optional_params&, float estimated_time) const;
      MultiDispatchData SetDefaultForMulti(const cum_sum_params& params) const;
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp

index fe05d26..ee71985 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp
@@ -20,8 +20,8 @@
  #include <vector>
  
  namespace kernel_selector {
-JitConstants CumSumKernelRef::GetJitConstants(const cum_sum_params& params, DispatchData kd) const {
-    auto jits = CumSumKernelBase::GetJitConstants(params, kd);
+JitConstants CumSumKernelRef::GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const {
+    auto jits = CumSumKernelBase::GetJitConstants(params, dispatchData);
  
      jits.AddConstant(MakeJitConstant("AXIS_LAYOUT_INDEX", GetCumSumAxisIndex(params)));
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h

index 58099d7..4273653 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h
@@ -24,7 +24,7 @@ public:
      CumSumKernelRef() : CumSumKernelBase("cum_sum_ref") {}
      virtual ~CumSumKernelRef() = default;
  protected:
-    JitConstants GetJitConstants(const cum_sum_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const override;
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp

index dbbc4a9..16f83ac 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
@@ -48,7 +48,7 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16::GetSupportedKey() const {
  }
  
  DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDefault(const deconvolution_params& params) const {
-    DispatchData kd = DeconvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = DeconvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -63,25 +63,26 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDef
  
      if (ver_bsv16_fsv16) {
          if (params.depthwise_separable_opt) {
-            kd.gws0 = x * y * z;
-            kd.gws1 = f;
-            kd.gws2 = b / 16;
+            dispatchData.gws[0] = x * y * z;
+            dispatchData.gws[1] = f;
+            dispatchData.gws[2] = b / 16;
  
-            kd.lws0 = 1;
-            kd.lws1 = sub_group_size;
-            kd.lws2 = 1;
+            dispatchData.lws[0] = 1;
+            dispatchData.lws[1] = sub_group_size;
+            dispatchData.lws[2] = 1;
          } else {
-            kd.gws0 = 64;
-            while (kd.gws0 > 16) {
-                if (f % kd.gws0 == 0) break;
-                kd.gws0 /= 2;
+            dispatchData.gws[0] = 64;
+            while (dispatchData.gws[0] > 16) {
+                if (f % dispatchData.gws[0] == 0)
+                    break;
+                dispatchData.gws[0] /= 2;
              }
-            kd.gws1 = x * y * z;
-            kd.gws2 = CeilDiv(b, 16) * (f / kd.gws0) * params.groups;
+            dispatchData.gws[1] = x * y * z;
+            dispatchData.gws[2] = CeilDiv(b, 16) * (f / dispatchData.gws[0]) * params.groups;
  
-            kd.lws0 = sub_group_size;
-            kd.lws1 = 1;
-            kd.lws2 = 1;
+            dispatchData.lws[0] = sub_group_size;
+            dispatchData.lws[1] = 1;
+            dispatchData.lws[2] = 1;
          }
      } else {
          size_t x_block_size = 16;
@@ -92,31 +93,32 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDef
          }
          x_block_size = std::max(x_block_size, (size_t)8);
          if (params.depthwise_separable_opt) {
-            kd.gws0 = CeilDiv(x, x_block_size) * y * z;
-            kd.gws1 = f;
-            kd.gws2 = b;
+            dispatchData.gws[0] = CeilDiv(x, x_block_size) * y * z;
+            dispatchData.gws[1] = f;
+            dispatchData.gws[2] = b;
  
-            kd.lws0 = 1;
-            kd.lws1 = sub_group_size;
-            kd.lws2 = 1;
+            dispatchData.lws[0] = 1;
+            dispatchData.lws[1] = sub_group_size;
+            dispatchData.lws[2] = 1;
          } else {
-            kd.gws0 = 64;
-            while (kd.gws0 > 16) {
-                if (f % kd.gws0 == 0) break;
-                kd.gws0 /= 2;
+            dispatchData.gws[0] = 64;
+            while (dispatchData.gws[0] > 16) {
+                if (f % dispatchData.gws[0] == 0)
+                    break;
+                dispatchData.gws[0] /= 2;
              }
-            kd.gws1 = CeilDiv(x, x_block_size) * y * z;
-            kd.gws2 = b * (f / kd.gws0);
+            dispatchData.gws[1] = CeilDiv(x, x_block_size) * y * z;
+            dispatchData.gws[2] = b * (f / dispatchData.gws[0]);
  
-            kd.lws0 = sub_group_size;
-            kd.lws1 = 1;
-            kd.lws2 = 1;
+            dispatchData.lws[0] = sub_group_size;
+            dispatchData.lws[1] = 1;
+            dispatchData.lws[2] = 1;
          }
      }
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  bool DeconvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_params& o) const {
@@ -230,10 +232,10 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const deconvolu
      jit.AddConstant(MakeJitConstant("IW_FULL", params.output.X().LogicalDimPadded()));
  
  
-    DispatchData runInfo = SetDefault(params);
-    jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0));
-    jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
-    jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
+    DispatchData dispatchData = SetDefault(params);
+    jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0]));
+    jit.AddConstant(MakeJitConstant("LWS_1", dispatchData.lws[1]));
+    jit.AddConstant(MakeJitConstant("LWS_2", dispatchData.lws[2]));
  
      if (!params.fused_ops.empty()) {
          auto fused_dt = GetActivationType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp

index cafa959..02a329c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp
@@ -138,7 +138,7 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetSupportedKey() const {
  }
  
  DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16_dw::SetDefault(const deconvolution_params& params) const {
-    DispatchData kd = DeconvolutionKernelBase::SetDefault(params);
+    DispatchData dispatchData = DeconvolutionKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -148,17 +148,17 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16_dw::Set
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = CeilDiv(x, GetDispatchParams(params).block_size_x) * y * z;
-    kd.gws1 = Align(f, feature_block_size);
-    kd.gws2 = b;
+    dispatchData.gws[0] = CeilDiv(x, GetDispatchParams(params).block_size_x) * y * z;
+    dispatchData.gws[1] = Align(f, feature_block_size);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = sub_group_size;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
  bool DeconvolutionKernel_b_fs_zyx_fsv16_dw::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp

index 4a7d89e..1dc654c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
@@ -90,22 +90,24 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernelBase::SetDefault(const
      auto batch_size = params.output.Batch().v;
      auto output_features = params.output.Feature().v;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
      size_t gws0 = output_features * batch_size;
      size_t lws0 = std::min(gws0, static_cast<size_t>(32));
      while (gws0 % lws0) {
          lws0--;
      }
-    kd.gws0 = gws0;
-    kd.gws1 = params.output.X().v;
-    kd.gws2 = params.output.Y().v * params.output.Z().v;
-    kd.lws0 = lws0;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    return kd;
+
+    dispatchData.gws[0] = gws0;
+    dispatchData.gws[1] = params.output.X().v;
+    dispatchData.gws[2] = params.output.Y().v * params.output.Z().v;
+
+    dispatchData.lws[0] = lws0;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
+
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    return dispatchData;
  }
  
  KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const optional_params& options) const {
@@ -116,7 +118,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
      }
  
      const deconvolution_params& orgParams = static_cast<const deconvolution_params&>(params);
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
      KernelData kd = KernelData::Default<deconvolution_params>(params);
      deconvolution_params& newParams = *static_cast<deconvolution_params*>(kd.params.get());
  
@@ -137,7 +139,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
@@ -149,7 +151,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
                       GetFusedPrimitiveInputsCount(params));
      kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
  
      return {kd};
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp

index 4084bdb..ac89b0b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
@@ -40,19 +40,20 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const {
  }
  
  CommonDispatchData DeconvolutionKernel_bfyx_opt::SetDefault(const deconvolution_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
      auto wg_size = 16;
  
-    kd.gws0 = Align(params.output.X().v, wg_size * params.stride.x);
-    kd.gws1 = params.output.Y().v;
-    kd.gws2 = params.output.Batch().v * params.output.Feature().v;
-    kd.lws0 = wg_size;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
-    kd.efficiency = FORCE_PRIORITY_6;
-    return kd;
+    dispatchData.gws[0] = Align(params.output.X().v, wg_size * params.stride.x);
+    dispatchData.gws[1] = params.output.Y().v;
+    dispatchData.gws[2] = params.output.Batch().v * params.output.Feature().v;
+
+    dispatchData.lws[0] = wg_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
+
+    dispatchData.efficiency = FORCE_PRIORITY_6;
+    return dispatchData;
  }
  
  JitConstants DeconvolutionKernel_bfyx_opt::GetJitConstants(const deconvolution_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp

index e7e49ab..7a97f47 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp
@@ -108,36 +108,28 @@ WeightsLayout DeconvolutionKernel_imad_along_f_tile_bfx::GetPreferredWeightsLayo
  }
  
  DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_along_f_tile_bfx::SetDefault(const deconvolution_params& params) const {
-    auto dispatch = Parent::SetDefault(params);
+    DispatchData dispatchData = Parent::SetDefault(params);
  
      auto tile_x = GetTileX(params);
      auto tile_ofm = GetTileOFM(params);
      auto tile_b = GetTileB(params);
  
-    std::vector<size_t> global = {
+    dispatchData.gws = {
           CeilDiv(params.output.X().v, tile_x) * params.output.Y().v * params.output.Z().v,
           Align(CeilDiv(params.output.Feature().v, tile_ofm), simd),
           CeilDiv(params.output.Batch().v, tile_b)
      };
  
-    std::vector<size_t> local = { 1, simd, 1 };
-
-    dispatch.gws0 = global[0];
-    dispatch.gws1 = global[1];
-    dispatch.gws2 = global[2];
-
-    dispatch.lws0 = local[0];
-    dispatch.lws1 = local[1];
-    dispatch.lws2 = local[2];
+    dispatchData.lws = { 1, simd, 1 };
  
      // Currently most optimized for fsv16 formats
      if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16) {
-        dispatch.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
      } else {
-        dispatch.efficiency = FORCE_PRIORITY_8;
+        dispatchData.efficiency = FORCE_PRIORITY_8;
      }
  
-    return dispatch;
+    return dispatchData;
  }
  
  JitConstants DeconvolutionKernel_imad_along_f_tile_bfx::GetJitConstants(const deconvolution_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp

index da9b46f..1eb8d7b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp
@@ -60,27 +60,19 @@ WeightsLayout DeconvolutionKernel_imad_ref::GetPreferredWeightsLayout(const deco
  }
  
  DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_ref::SetDefault(const deconvolution_params& params) const {
-    auto dispatch = Parent::SetDefault(params);
+    DispatchData dispatchData = Parent::SetDefault(params);
  
-    std::vector<size_t> global = {
+    dispatchData.gws = {
           params.output.Feature().v,
           params.output.X().v * params.output.Y().v * params.output.Z().v,
           params.output.Batch().v
      };
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    dispatch.gws0 = global[0];
-    dispatch.gws1 = global[1];
-    dispatch.gws2 = global[2];
+    dispatchData.efficiency = FORCE_PRIORITY_9;
  
-    dispatch.lws0 = local[0];
-    dispatch.lws1 = local[1];
-    dispatch.lws2 = local[2];
-
-    dispatch.efficiency = FORCE_PRIORITY_9;
-
-    return dispatch;
+    return dispatchData;
  }
  
  JitConstants DeconvolutionKernel_imad_ref::GetJitConstants(const deconvolution_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp

index b3d4268..b8cb81e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -62,20 +62,20 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
  }
  
  CommonDispatchData DeconvolutionKernelRef::SetDefault(const deconvolution_params& params) const {
-    CommonDispatchData runInfo = DeconvolutionKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = DeconvolutionKernelBase::SetDefault(params);
  
      if (params.output.Feature().v * params.output.Batch().v <= 16) {
          const auto& out = params.output;
-        runInfo.gws0 = Align(out.X().v, 32);
-        runInfo.gws1 = out.Y().v * out.Z().v;
-        runInfo.gws2 = out.Feature().v * out.Batch().v;
+        dispatchData.gws[0] = Align(out.X().v, 32);
+        dispatchData.gws[1] = out.Y().v * out.Z().v;
+        dispatchData.gws[2] = out.Feature().v * out.Batch().v;
  
-        runInfo.lws0 = 32;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = 32;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants DeconvolutionKernelRef::GetJitConstants(const deconvolution_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp

index f120dcb..f13a64f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp
@@ -40,23 +40,15 @@ bool DepthToSpaceKernelBase::Validate(const Params& p, const optional_params& o)
  }
  
  CommonDispatchData DepthToSpaceKernelBase::SetDefault(const depth_to_space_params& params) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = { params.output.Batch().v,
-                                   params.output.Feature().v,
-                                   params.output.Z().v * params.output.Y().v * params.output.X().v };
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Z().v * params.output.Y().v * params.output.X().v };
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const {
@@ -80,14 +72,14 @@ KernelsData DepthToSpaceKernelBase::GetCommonKernelsData(const Params& params, c
          return {};
      }
  
-    auto runInfo = SetDefault(newParams);
+    auto dispatchData = SetDefault(newParams);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
                       DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
  
      kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp

index 665e535..4a87031 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp
@@ -45,23 +45,14 @@ bool DepthToSpaceKernelBlock2Opt::Validate(const Params& p, const optional_param
  }
  
  CommonDispatchData DepthToSpaceKernelBlock2Opt::SetDefault(const depth_to_space_params& params) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = { Align(params.inputs[0].X().v / 2, 16),
-                                   params.inputs[0].Y().v,
-                                   1};
+    dispatchData.gws = { Align(params.inputs[0].X().v / 2, 16),
+                         params.inputs[0].Y().v,
+                         1 };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants DepthToSpaceKernelBlock2Opt::GetJitConstants(const depth_to_space_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp

index a941bdd..418b124 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -48,17 +48,17 @@ JitConstants DetectionOutputKernelBase::GetJitConstants(const detection_output_p
      return jit;
  }
  
-DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault(
-    const detection_output_params& params) const {
-    DispatchData kd;
+DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault(const detection_output_params& /*params*/) const {
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-    kd.gws0 = 0;
-    kd.gws1 = 0;
-    kd.gws2 = 0;
-    kd.lws0 = 0;
-    kd.lws1 = 0;
-    kd.lws2 = 0;
-    return kd;
+    dispatchData.gws[0] = 0;
+    dispatchData.gws[1] = 0;
+    dispatchData.gws[2] = 0;
+
+    dispatchData.lws[0] = 0;
+    dispatchData.lws[1] = 0;
+    dispatchData.lws[2] = 0;
+
+    return dispatchData;
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp

index a9b6602..a68d458 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ ParamsKey DetectionOutputKernel::GetSupportedKey() const {
  }
  
  CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_params& params) const {
-    CommonDispatchData runInfo = DetectionOutputKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params);
  
      // Number of all work items is set to total number of bounding boxes -
      // one bounding box is procerssed by one work item
@@ -54,15 +54,15 @@ CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_para
  
      bboxesNum = work_group_size * params.inputs[0].Batch().v;
  
-    runInfo.gws0 = Align(bboxesNum, work_group_size);
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = Align(bboxesNum, work_group_size);
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = work_group_size;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = work_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const optional_params& options) const {
@@ -70,14 +70,14 @@ KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const op
  
      KernelData kd = KernelData::Default<detection_output_params>(params);
      const detection_output_params& detectOutParams = static_cast<const detection_output_params&>(params);
-    DispatchData runInfo = SetDefault(detectOutParams);
+    DispatchData dispatchData = SetDefault(detectOutParams);
  
      auto cldnnJit = GetJitConstants(detectOutParams);
      auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint);
      kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
      kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2});
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp

index 91b9945..03de4a7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ ParamsKey DetectionOutputKernel_sort::GetSupportedKey() const {
  }
  
  CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output_params& params) const {
-    CommonDispatchData runInfo = DetectionOutputKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params);
  
      unsigned class_num = params.detectOutParams.num_classes;
      if (params.detectOutParams.share_location && params.detectOutParams.background_label_id == 0) {
@@ -49,15 +49,15 @@ CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output
          work_group_size = (work_group_size + work_group_size % 2) / (work_group_size / 256 + 1);
      }
  
-    runInfo.gws0 = Align(bboxesNum, work_group_size);
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = Align(bboxesNum, work_group_size);
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = work_group_size;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = work_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, const optional_params& options) const {
@@ -66,14 +66,14 @@ KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, con
  
      KernelData kd = KernelData::Default<detection_output_params>(params);
      const detection_output_params& detectOutParams = static_cast<const detection_output_params&>(params);
-    DispatchData runInfo = SetDefault(detectOutParams);
+    DispatchData dispatchData = SetDefault(detectOutParams);
  
      auto cldnnJit = GetJitConstants(detectOutParams);
      auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint);
  
      kd.estimatedTime = FORCE_PRIORITY_8;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp

index e180e8a..7f12c6b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
@@ -200,23 +200,23 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
  }
  
  EltwiseKernelBase::DispatchData EltwiseKernel_b_fs_yx_fsv16::SetDefault(const eltwise_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.gws0 = Align(params.output.Feature().v, 16);
-    kd.gws1 = CeilDiv(params.output.X().v, GetBlockSize(params)) * params.output.Y().v;
-    kd.gws2 = params.output.Batch().v;
+    dispatchData.gws[0] = Align(params.output.Feature().v, 16);
+    dispatchData.gws[1] = CeilDiv(params.output.X().v, GetBlockSize(params)) * params.output.Y().v;
+    dispatchData.gws[2] = params.output.Batch().v;
  
-    kd.lws0 = 16;
-    kd.lws1 = 16;
-    while (kd.lws1 > 1) {
-        if (kd.gws1 % kd.lws1 == 0)
+    dispatchData.lws[0] = 16;
+    dispatchData.lws[1] = 16;
+    while (dispatchData.lws[1] > 1) {
+        if (dispatchData.gws[1] % dispatchData.lws[1] == 0)
              break;
-        kd.lws1--;
+        dispatchData.lws[1]--;
      }
-    kd.lws2 = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_1;
-    return kd;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
+    return dispatchData;
  }
  
  KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, const optional_params& options) const {
@@ -231,12 +231,12 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
-    DispatchData runInfo = SetDefault(newParams);
+    DispatchData dispatchData = SetDefault(newParams);
  
      auto& kernel = kd.kernels[0];
  
-    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local = dispatchData.lws;
  
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
      kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
@@ -244,7 +244,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
                                     false,
                                     GetFusedPrimitiveInputsCount(params));
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
  
      return {kd};
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp

index 8de307d..f8bc154 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@@ -512,17 +512,14 @@ JitConstants EltwiseKernelBase::GetJitConstants(const eltwise_params& params) co
  }
  
  EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      if (params.layoutBased || params.int8_quantization || params.broadcast) {
-        auto global = GetTensorFriendlyWorkGroups(params.output);
-        kd.gws0 = global[0];
-        kd.gws1 = global[1];
-        kd.gws2 = global[2];
+        dispatchData.gws = GetTensorFriendlyWorkGroups(params.output);
      } else if (CheckInputsOutputNoPitchSameDims(params)) {
-        kd.gws0 = params.output.LogicalSize();
-        kd.gws1 = 1;
-        kd.gws2 = 1;
+        dispatchData.gws[0] = params.output.LogicalSize();
+        dispatchData.gws[1] = 1;
+        dispatchData.gws[2] = 1;
      } else {
          const auto& out = params.output;
  
@@ -536,60 +533,58 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
              gws.push_back(1U);
          }
  
-        kd.gws0 = gws[0];
+        dispatchData.gws[0] = gws[0];
          if (n_dims == 6) {
-            kd.gws1 = gws[1] * gws[2] * gws[3];  // y*z*w
-            kd.gws2 = gws[4] * gws[5];
+            dispatchData.gws[1] = gws[1] * gws[2] * gws[3];  // y*z*w
+            dispatchData.gws[2] = gws[4] * gws[5];
          } else if (n_dims == 5) {
-            kd.gws1 = gws[1] * gws[2];  // y*z
-            kd.gws2 = gws[3] * gws[4];
+            dispatchData.gws[1] = gws[1] * gws[2];  // y*z
+            dispatchData.gws[2] = gws[3] * gws[4];
          } else {
-            kd.gws1 = gws[1];
-            kd.gws2 = gws[2] * gws[3];
+            dispatchData.gws[1] = gws[1];
+            dispatchData.gws[2] = gws[2] * gws[3];
          }
      }
  
-    auto local = GetOptimalLocalWorkGroupSizes({kd.gws0, kd.gws1, kd.gws2}, params.engineInfo);
+    auto local = GetOptimalLocalWorkGroupSizes({dispatchData.gws[0], dispatchData.gws[1], dispatchData.gws[2]}, params.engineInfo);
  
      const size_t optimal_lws_values[] = {256, 224, 192, 160, 128, 96, 64, 32, 16};
      if ((params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 ||
           params.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 ||
           params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16) &&
-        params.output.Feature().v % 16 == 0 && kd.gws1 % 16 == 0) {
-        kd.lws0 = 1;
+        params.output.Feature().v % 16 == 0 && dispatchData.gws[1] % 16 == 0) {
+        dispatchData.lws[0] = 1;
          for (auto lws : optimal_lws_values) {
-            if (kd.gws1 % lws == 0) {
-                kd.lws1 = lws;
+            if (dispatchData.gws[1] % lws == 0) {
+                dispatchData.lws[1] = lws;
                  break;
              }
          }
-        kd.lws2 = 1;
+        dispatchData.lws[2] = 1;
      } else if (params.output.GetLayout() == DataLayout::fs_b_yx_fsv32) {
-        kd.gws2 = Align(kd.gws2, 32);
-        kd.lws0 = 1;
-        kd.lws1 = 1;
-        kd.lws2 = 32;
+        dispatchData.gws[2] = Align(dispatchData.gws[2], 32);
+        dispatchData.lws[0] = 1;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 32;
      } else if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv32 && params.output.Feature().v % 32 == 0) {
          if (params.layoutBased || params.int8_quantization || params.broadcast) {
-            kd.lws0 = 1;
-            kd.lws1 = 32;
-            kd.lws2 = 1;
-        } else if (kd.gws0 == params.output.LogicalSize()) {
-            kd.lws0 = local[0];
-            kd.lws1 = local[1];
-            kd.lws2 = local[2];
+            dispatchData.lws[0] = 1;
+            dispatchData.lws[1] = 32;
+            dispatchData.lws[2] = 1;
+        } else if (dispatchData.gws[0] == params.output.LogicalSize()) {
+            dispatchData.lws = local;
          } else {
-            kd.lws0 = 1;
-            kd.lws1 = 1;
-            kd.lws2 = 32;
+            dispatchData.lws[0] = 1;
+            dispatchData.lws[1] = 1;
+            dispatchData.lws[2] = 32;
          }
      } else {
-        kd.lws0 = local[0];
-        kd.lws1 = local[1];
-        kd.lws2 = local[2];
+        dispatchData.lws[0] = local[0];
+        dispatchData.lws[1] = local[1];
+        dispatchData.lws[2] = local[2];
      }
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
@@ -604,12 +599,12 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
-    DispatchData runInfo = SetDefault(newParams);
+    DispatchData dispatchData = SetDefault(newParams);
  
      auto& kernel = kd.kernels[0];
  
-    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local = dispatchData.lws;
  
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
      kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(),
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp

index 4abb291..0f5f71e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp
@@ -21,26 +21,6 @@
  
  namespace kernel_selector {
  
-namespace {
-std::shared_ptr<JitConstant> GetJit_GetIndexForDataLayout(std::string jitName,
-                                                          std::string prefix,
-                                                          DataLayout dataLayout) {
-    std::string jitValue;
-    switch (dataLayout) {
-        case DataLayout::byxf:
-            jitValue += "GET_DATA_INDEX(";
-            break;
-        case DataLayout::fs_b_yx_fsv32:
-            jitValue += "GET_DATA_FS_B_YX_FSV32_INDEX(";
-            break;
-        default:
-            throw std::runtime_error("incorrect data_layout");
-    }
-    jitValue += prefix + ",b,f,y,x)";
-
-    return MakeJitConstant(jitName, jitValue);
-}
-}  // namespace
  // TODO: [blocked_formats] does fp32 work well with kernel?
  ParamsKey EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32::GetSupportedKey() const {
      ParamsKey k;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp

index 333ecc2..8c73282 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp
@@ -43,23 +43,14 @@ JitConstants EmbeddingBagKernelRef::GetJitConstants(const embedding_bag_params&
  }
  
  CommonDispatchData EmbeddingBagKernelRef::SetDefault(const embedding_bag_params& params) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = { params.output.Batch().v,
-                                   params.output.Feature().v,
-                                   params.output.Y().v * params.output.X().v };
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Y().v * params.output.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
@@ -70,7 +61,7 @@ KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const op
          return {};
      }
  
-    auto runInfo = SetDefault(newParams);
+    auto dispatchData = SetDefault(newParams);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
@@ -78,7 +69,7 @@ KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const op
      auto& kernel = kd.kernels[0];
  
      FillCLKernelData(kernel,
-            runInfo,
+            dispatchData,
              params.engineInfo,
              kernelName,
              jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp

index f3c3e7c..47083f7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp
@@ -53,23 +53,14 @@ JitConstants ExtractImagePatchesKernelBase::GetJitConstants(const extract_image_
  }
  
  ExtractImagePatchesKernelBase::DispatchData ExtractImagePatchesKernelBase::SetDefault(const extract_image_patches_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    std::vector<size_t> global = { params.output.Batch().v,
-                                   params.output.Feature().v,
-                                   params.output.Y().v * params.output.X().v };
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Y().v * params.output.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& params,
@@ -81,7 +72,7 @@ KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& pa
  
      const auto& prim_params = static_cast<const extract_image_patches_params&>(params);
  
-    auto run_info = SetDefault(prim_params);
+    auto dispatchData = SetDefault(prim_params);
      KernelData kd = KernelData::Default<extract_image_patches_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -89,7 +80,7 @@ KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& pa
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = estimated_time;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h

index 33097aa..c3ad259 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h
@@ -24,7 +24,7 @@ public:
      virtual ~FullyConnectedBlockKernelBase() {}
  
  protected:
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  
      // how many batches will a single work item compute
      virtual size_t GetBatchesPerWorkItem(const fully_connected_params& params) const;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp

index 9617e45..234e027 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -35,18 +35,16 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par
  FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params,
                                                                              int) const {
      DispatchData dispatchData;
-    dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
  
      // Determine global work sizes.
-    dispatchData.gws0 = params.output.LogicalSize();
-    dispatchData.gws1 = dispatchData.gws2 = 1;
+    dispatchData.gws = { params.output.LogicalSize(), 1, 1 };
  
      // Find largest positive local work size that is divider for global work size.
-    dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-    while (dispatchData.gws0 % dispatchData.lws0 != 0) {
-        --dispatchData.lws0;
+    dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+    while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+        --dispatchData.lws[0];
      }
-    dispatchData.lws1 = dispatchData.lws2 = 1;
+    dispatchData.lws[1] = dispatchData.lws[2] = 1;
  
      return dispatchData;
  }
@@ -99,8 +97,8 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params &params,
  
      auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
  
-    const DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
-    auto cldnn_jit = GetJitConstants(newParams, runInfo);
+    const DispatchData dispatchData = SetDefault(newParams, autoTuneIndex);
+    auto cldnn_jit = GetJitConstants(newParams, dispatchData);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      uint32_t fused_deps_total = 0;
@@ -112,7 +110,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params &params,
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h

index f732cf8..b3da6c9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h
@@ -63,7 +63,7 @@ public:
                                                     const int autoTuneIndex = -1) const;
  
  protected:
-    virtual JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const;
      virtual DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const;
      KernelsData GetCommonKernelsData(const Params &params,
                                       const optional_params &options,
@@ -74,7 +74,7 @@ protected:
                                       int autoTuneIndex = -1) const;
  
      // Fused ops
-    virtual JitConstants GetFusedPrimitivesJitConstants(const fully_connected_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetFusedPrimitivesJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const;
      Datatype GetActivationType(const fully_connected_params& params) const;
      // --Fused ops
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp

index 05e17cc..4fdeed2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp
@@ -39,30 +39,22 @@ ParamsKey FullyConnected_bf_io_GEMM::GetSupportedKey() const {
  
  FullyConnected_bf_io_GEMM::DispatchData FullyConnected_bf_io_GEMM::SetDefault(const fully_connected_params& params,
                                                                                int autoTuneIndex) const {
-    auto runInfo = Parent::SetDefault(params, autoTuneIndex);
+    auto dispatchData = Parent::SetDefault(params, autoTuneIndex);
  
      const uint32_t localWorkSizeX = 64;
      const uint32_t globalWorkSizeX = localWorkSizeX;
  
-    std::vector<size_t> global = {globalWorkSizeX, params.output.Feature().v, params.output.Batch().v};
-    std::vector<size_t> local = {localWorkSizeX, 1, 1};
+    dispatchData.gws = { globalWorkSizeX, params.output.Feature().v, 1 };
+    dispatchData.lws = { localWorkSizeX, 1, 1 };
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = 1;
+    dispatchData.efficiency = FORCE_PRIORITY_6;
  
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = 1;
-
-    runInfo.efficiency = FORCE_PRIORITY_6;
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants FullyConnected_bf_io_GEMM::GetJitConstants(const fully_connected_params& params,
-                                                        const DispatchData& kd) const {
-    auto jit = Parent::GetJitConstants(params, kd);
+                                                        const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      if (params.inputs[0].GetDType() == Datatype::F16) {
          jit.AddConstant(MakeJitConstant("__fc_f16", ""));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h

index 162950b..edfd5bd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h
@@ -29,6 +29,6 @@ public:
  
  protected:
      DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp

index 7b0d270..aaa22fd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp
@@ -35,27 +35,28 @@ ParamsKey FullyConnected_bf_io_input_spatial::GetSupportedKey() const {
  FullyConnected_bf_io_input_spatial::DispatchData FullyConnected_bf_io_input_spatial::SetDefault(
      const fully_connected_params& arg,
      int) const {
-    auto kd = FullyConnectedKernelBase::SetDefault(arg);
+    auto dispatchData = FullyConnectedKernelBase::SetDefault(arg);
  
-    kd.gws0 = Align(arg.output.LogicalSize() / arg.inputs[0].Batch().v, 16);
-    kd.gws1 = arg.inputs[0].Batch().v;
-    kd.gws2 = 1;
-    kd.lws0 = 16;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.gws[0] = Align(arg.output.LogicalSize() / arg.inputs[0].Batch().v, 16);
+    dispatchData.gws[1] = arg.inputs[0].Batch().v;
+    dispatchData.gws[2] = 1;
  
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    dispatchData.lws[0] = 16;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
+
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
      const auto& input = arg.inputs[0];
      const auto& output = arg.output;
  
      if (input.Batch().v == 1 && output.Batch().v == 1) {
          if ((input.LogicalSize() / output.Batch().v >= 4096) && (output.Feature().v >= 4096)) {
-            kd.efficiency = FORCE_PRIORITY_1;
+            dispatchData.efficiency = FORCE_PRIORITY_1;
          }
      }
  
-    return kd;
+    return dispatchData;
  }
  
  bool FullyConnected_bf_io_input_spatial::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp

index cd7bbcc..5caf4eb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp
@@ -32,8 +32,8 @@ ParamsKey FullyConnected_bf_io_ref::GetSupportedKey() const {
      return k;
  }
  
-JitConstants FullyConnected_bf_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants FullyConnected_bf_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetUnitType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h

index 331ccf5..6ce6621 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h
@@ -31,6 +31,6 @@ protected:
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::ACTIVATION };
      }
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp

index 0101ace..858c43b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -182,7 +182,7 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params,
                  .Case(tune_params(16, std::min(max_tile_ofm, 2u), 1, 2, 1,  1, AGE_BASED))
                  .Case(tune_params(8,  std::min(max_tile_ofm, 2u), 1, 2, 1,  1, AGE_BASED));
      }
-    
+
      if (dtype == Datatype::F32) {
          // tune_params(tile_b, tile_ofm, tile_ifm, tile_k, dispatch_bsv, dispatch_fsv, exec_options)
          selector.Case(tune_params(8,  std::min(max_tile_ofm, 2u), 1, 1, 16, 2, AGE_BASED))
@@ -195,17 +195,17 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params,
  
      selector.Case([&](const fully_connected_params&) -> tune_params {
          tune_params result(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, DEFAULT);
-    
+
          while (batch % result.tile_b != 0)
              result.tile_b--;
-    
+
          result.dispatch_bsv = 16;
          while (batch % (result.tile_b * result.dispatch_bsv) != 0)
              result.dispatch_bsv--;
  
          if (result.tile_b >= 8)
              result.exec_options = AGE_BASED;
-    
+
          return result;
      });
  
@@ -214,43 +214,43 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params,
  
  FullyConnected_bf_tiled::DispatchData
  FullyConnected_bf_tiled::SetDefault(const fully_connected_params& params, int autoTuneIndex) const {
-    auto runInfo = Parent::SetDefault(params);
+    auto dispatchData = Parent::SetDefault(params);
      auto tparams = GetAutoTuneParams(params, autoTuneIndex);
  
      size_t feature_threads = CeilDiv(params.output.Feature().v, tparams.tile_ofm * simd);
      size_t batch_threads = params.output.Batch().v / tparams.tile_b;
  
-    runInfo.gws0 = feature_threads * batch_threads * simd;
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = feature_threads * batch_threads * simd;
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = simd;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = simd;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    runInfo.tile_m = tparams.tile_b;
-    runInfo.tile_n = tparams.tile_ofm;
-    runInfo.tile_mk = tparams.tile_ifm;
-    runInfo.tile_nk = tparams.tile_k;
-    runInfo.tile_ms = tparams.dispatch_bsv;
-    runInfo.tile_ns = tparams.dispatch_fsv;
+    dispatchData.tile_m = tparams.tile_b;
+    dispatchData.tile_n = tparams.tile_ofm;
+    dispatchData.tile_mk = tparams.tile_ifm;
+    dispatchData.tile_nk = tparams.tile_k;
+    dispatchData.tile_ms = tparams.dispatch_bsv;
+    dispatchData.tile_ns = tparams.dispatch_fsv;
  
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(MakeJitConstant("SIMD", simd));
-    jit.AddConstant(MakeJitConstant("TILE_B", kd.tile_m));
-    jit.AddConstant(MakeJitConstant("TILE_OFM", kd.tile_n));
-    jit.AddConstant(MakeJitConstant("TILE_IFM", kd.tile_mk));
-    jit.AddConstant(MakeJitConstant("TILE_K", kd.tile_nk));
-    jit.AddConstant(MakeJitConstant("TILE_K_OFM", kd.tile_nk * kd.tile_n));
-    jit.AddConstant(MakeJitConstant("DISPATCH_BSV", kd.tile_ms));
-    jit.AddConstant(MakeJitConstant("DISPATCH_FSV", kd.tile_ns));
+    jit.AddConstant(MakeJitConstant("TILE_B", dispatchData.tile_m));
+    jit.AddConstant(MakeJitConstant("TILE_OFM", dispatchData.tile_n));
+    jit.AddConstant(MakeJitConstant("TILE_IFM", dispatchData.tile_mk));
+    jit.AddConstant(MakeJitConstant("TILE_K", dispatchData.tile_nk));
+    jit.AddConstant(MakeJitConstant("TILE_K_OFM", dispatchData.tile_nk * dispatchData.tile_n));
+    jit.AddConstant(MakeJitConstant("DISPATCH_BSV", dispatchData.tile_ms));
+    jit.AddConstant(MakeJitConstant("DISPATCH_FSV", dispatchData.tile_ns));
  
-    jit.Merge(MakeConstantLoopUnrollJitConstants(kd.tile_m));
+    jit.Merge(MakeConstantLoopUnrollJitConstants(dispatchData.tile_m));
  
      bool realign_fp16_offset = params.inputs[0].GetDType() == Datatype::F16 && params.output.GetFirstElementOffset() % 2 != 0;
      jit.AddConstant(MakeJitConstant("REALIGN_FP16_OFFSET", realign_fp16_offset));
@@ -262,14 +262,14 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para
  
      if (!params.fused_ops.empty()) {
          auto boundary_check = BoundaryCheck::DISABLED;
-        if (params.output.Feature().v % (kd.tile_n * simd) != 0)
+        if (params.output.Feature().v % (dispatchData.tile_n * simd) != 0)
              boundary_check = BoundaryCheck::ENABLED;
  
          FusedOpsConfiguration conf = { "",
                                         {"(out_b + bi)", "out_f", "0", "0"},
                                         "activated[bi]",
                                         activation_dt,
-                                       kd.tile_n,
+                                       dispatchData.tile_n,
                                         LoadType::LT_ALIGNED_READ,
                                         boundary_check,
                                         IndexType::TENSOR_COORD,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h

index 72d0e77..e795165 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h
@@ -68,7 +68,7 @@ protected:
                   FusedOpType::SCALE,
                   FusedOpType::QUANTIZE };
      }
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& params, const optional_params& options) const override;
  
      tune_params GetAutoTuneParams(const fully_connected_params& params, int idx = -1) const;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp

index b6db4b9..4937335 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -48,25 +48,17 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const {
  
  FullyConnected_bfyx_Ref::DispatchData FullyConnected_bfyx_Ref::SetDefault(const fully_connected_params& params,
                                                                            int) const {
-    auto runInfo = Parent::SetDefault(params);
+    auto dispatchData = Parent::SetDefault(params);
  
-    std::vector<size_t> global = {params.output.Feature().v, params.output.Batch().v};
-    std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { params.output.Feature().v, params.output.Batch().v, 1 };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = 1;
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = 1;
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants FullyConnected_bfyx_Ref::GetJitConstants(const fully_connected_params& params,
-    const FullyConnectedKernelBase::DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+    const FullyConnectedKernelBase::DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      Datatype accumulator_dt;
      Datatype activation_dt;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h

index 2965a0d..e47bb2f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h
@@ -36,6 +36,6 @@ protected:
                   FusedOpType::ACTIVATION };
      }
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp

index a89e55c..beda9cb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -34,15 +34,15 @@ ParamsKey FullyConnected_bs_f_bsv16_af8::GetSupportedKey() const {
  
  FullyConnected_bs_f_bsv16_af8::DispatchData FullyConnected_bs_f_bsv16_af8::SetDefault(const fully_connected_params& arg,
                                                                                        int) const {
-    auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
+    auto dispatchData = FullyConnectedBlockKernelBase::SetDefault(arg);
  
      size_t groups_per_batches = GetLocalGroupsSize(arg);
-    kd.gws0 = Align(arg.output.LogicalSize() / (GetBatchesPerWorkItem(arg) * groups_per_batches), 16);
-    kd.gws1 = groups_per_batches;
-    kd.lws0 = 16;
-    kd.lws1 = 1;
+    dispatchData.gws[0] = Align(arg.output.LogicalSize() / (GetBatchesPerWorkItem(arg) * groups_per_batches), 16);
+    dispatchData.gws[1] = groups_per_batches;
+    dispatchData.lws[0] = 16;
+    dispatchData.lws[1] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  static bool check_input_layout(const DataTensor& t) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp

index 49160b5..f20dbe7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -34,12 +34,12 @@ ParamsKey FullyConnected_bs_f_bsv16_b1::GetSupportedKey() const {
  
  JitConstants FullyConnected_bs_f_bsv16_b1::GetJitConstants(
      const fully_connected_params& params,
-    const FullyConnectedKernelBase::DispatchData& run_info) const {
-    auto& d = static_cast<const DispatchData&>(run_info);
-    auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, run_info);
+    const FullyConnectedKernelBase::DispatchData& dispatchData) const {
+    auto& d = static_cast<const DispatchData&>(dispatchData);
+    auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, dispatchData);
      cldnn_jit.AddConstants({
-        MakeJitConstant("SUB_GROUP_SIZE", run_info.lws0),
-        MakeJitConstant("WORK_ITEMS_PER_BATCH", run_info.gws1),
+        MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]),
+        MakeJitConstant("WORK_ITEMS_PER_BATCH", dispatchData.gws[1]),
  
          MakeJitConstant("UNIT_BYTE_SIZE", d.unit_byte_size),
          MakeJitConstant("CHUNK_TYPE", d.chunk_type),
@@ -56,11 +56,11 @@ JitConstants FullyConnected_bs_f_bsv16_b1::GetJitConstants(
  
  FullyConnected_bs_f_bsv16_b1::DispatchData FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg,
                                                                                      int) const {
-    DispatchData run_info = FullyConnectedKernelBase::SetDefault(arg);
+    DispatchData dispatchData = FullyConnectedKernelBase::SetDefault(arg);
  
      // Properties of chunk and unit.
      const char* chunk_type = "uint";
-    const uint32_t unit_byte_size = run_info.fp16UnitUsed ? sizeof(short) : sizeof(float);
+    const uint32_t unit_byte_size = BytesPerElement(arg.inputs[0].GetDType());
      constexpr uint32_t chunk_byte_size = sizeof(uint32_t);
      constexpr uint32_t sub_group_size = 16;
      const uint32_t units_per_chunk = chunk_byte_size / unit_byte_size;
@@ -73,23 +73,23 @@ FullyConnected_bs_f_bsv16_b1::DispatchData FullyConnected_bs_f_bsv16_b1::SetDefa
      const auto response_size = arg.output.Feature().v;
      auto rg_count = CeilDiv(response_size, responses_per_sg_exec);
  
-    run_info.lws0 = sub_group_size;
+    dispatchData.lws[0] = sub_group_size;
      // Number of work items needed to process all response groups.
-    run_info.gws0 = rg_count * sub_group_size;
-    run_info.lws1 = run_info.lws2 = 1;
-    run_info.gws1 = run_info.gws2 = 1;
+    dispatchData.gws[0] = rg_count * sub_group_size;
+    dispatchData.lws[1] = dispatchData.lws[2] = 1;
+    dispatchData.gws[1] = dispatchData.gws[2] = 1;
  
-    run_info.unit_byte_size = unit_byte_size;
-    run_info.chunk_type = chunk_type;
-    run_info.chunk_byte_size = chunk_byte_size;
-    run_info.units_per_chunk = units_per_chunk;
-    run_info.bytes_per_sg_read = sub_group_size * chunk_byte_size;
-    run_info.units_per_sg_read = units_per_sg_read;
-    run_info.responses_per_sg_exec = responses_per_sg_exec;
-    run_info.in_chunk_prefetch_size = 2;
-    run_info.filter_chunk_prefetch_size = responses_per_sg_exec;
+    dispatchData.unit_byte_size = unit_byte_size;
+    dispatchData.chunk_type = chunk_type;
+    dispatchData.chunk_byte_size = chunk_byte_size;
+    dispatchData.units_per_chunk = units_per_chunk;
+    dispatchData.bytes_per_sg_read = sub_group_size * chunk_byte_size;
+    dispatchData.units_per_sg_read = units_per_sg_read;
+    dispatchData.responses_per_sg_exec = responses_per_sg_exec;
+    dispatchData.in_chunk_prefetch_size = 2;
+    dispatchData.filter_chunk_prefetch_size = responses_per_sg_exec;
  
-    return run_info;
+    return dispatchData;
  }
  
  KernelsData FullyConnected_bs_f_bsv16_b1::GetKernelsData(const Params& params, const optional_params& optParams) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h

index 03422b8..30e3830 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h
@@ -28,7 +28,7 @@ public:
  
  protected:
      JitConstants GetJitConstants(const fully_connected_params& params,
-                                 const FullyConnectedKernelBase::DispatchData& kd) const override;
+                                 const FullyConnectedKernelBase::DispatchData& dispatchData) const override;
      DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp

index 60e879a..ebc6da8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -36,17 +36,17 @@ ParamsKey FullyConnected_bs_f_bsv8_af8::GetSupportedKey() const {
  
  FullyConnected_bs_f_bsv8_af8::DispatchData FullyConnected_bs_f_bsv8_af8::SetDefault(const fully_connected_params& arg,
                                                                                      int) const {
-    auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
+    auto dispatchData = FullyConnectedBlockKernelBase::SetDefault(arg);
  
      size_t groups_per_batches = GetLocalGroupsSize(arg);
-    kd.gws0 =
+    dispatchData.gws[0] =
          Align(arg.output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches),
                8);
-    kd.gws1 = groups_per_batches;
-    kd.lws0 = 8;
-    kd.lws1 = 1;
+    dispatchData.gws[1] = groups_per_batches;
+    dispatchData.lws[0] = 8;
+    dispatchData.lws[1] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  static bool check_input_layout(const DataTensor& t) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp

index 62ea7f2..77e720d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -48,18 +48,18 @@ size_t FullyConnected_fb_io_b8_f8::GetBatchesPerWorkItem(const fully_connected_p
  
  FullyConnected_fb_io_b8_f8::DispatchData FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg,
                                                                                  int) const {
-    auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
+    auto dispatchData = FullyConnectedBlockKernelBase::SetDefault(arg);
  
      const auto& output = arg.output;
  
      size_t groups_per_batches = GetLocalGroupsSize(arg);
-    kd.gws0 =
+    dispatchData.gws[0] =
          Align(output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8);
-    kd.gws1 = groups_per_batches;
-    kd.lws0 = 8;
-    kd.lws1 = 1;
+    dispatchData.gws[1] = groups_per_batches;
+    dispatchData.lws[0] = 8;
+    dispatchData.lws[1] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  bool FullyConnected_fb_io_b8_f8::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp

index 4af19b2..2ec01a1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ ParamsKey FullyConnected_fb_io_block::GetSupportedKey() const {
  
  FullyConnected_fb_io_block::DispatchData FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg,
                                                                                  int) const {
-    auto kd = FullyConnectedKernelBase::SetDefault(arg);
+    auto dispatchData = FullyConnectedKernelBase::SetDefault(arg);
      const auto& output = arg.output;
  
      auto batch_size = output.Batch().v;
@@ -50,37 +50,37 @@ FullyConnected_fb_io_block::DispatchData FullyConnected_fb_io_block::SetDefault(
      // for at least one input data set from batch.
      auto rg_count = CeilDiv(response_size, units_per_sg_read);
  
-    kd.lws0 = sub_group_size;
+    dispatchData.lws[0] = sub_group_size;
      // Number of work items needed to process all response groups.
-    kd.gws0 = rg_count * sub_group_size;
-    kd.lws1 = 1;
-    kd.gws1 = batch_size / units_per_sg_read;
-
-    kd.unit_byte_size = unit_byte_size;
-    kd.chunk_type = chunk_type;
-    kd.chunk_byte_size = chunk_byte_size;
-    kd.units_per_chunk = units_per_chunk;
-    kd.bytes_per_sg_read = sub_group_size * chunk_byte_size;
-    kd.units_per_sg_read = units_per_sg_read;
-    kd.rg_count = (uint32_t)rg_count;
-    kd.last_rg_size = response_size % units_per_sg_read;
-    return kd;
+    dispatchData.gws[0] = rg_count * sub_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.gws[1] = batch_size / units_per_sg_read;
+
+    dispatchData.unit_byte_size = unit_byte_size;
+    dispatchData.chunk_type = chunk_type;
+    dispatchData.chunk_byte_size = chunk_byte_size;
+    dispatchData.units_per_chunk = units_per_chunk;
+    dispatchData.bytes_per_sg_read = sub_group_size * chunk_byte_size;
+    dispatchData.units_per_sg_read = units_per_sg_read;
+    dispatchData.rg_count = (uint32_t)rg_count;
+    dispatchData.last_rg_size = response_size % units_per_sg_read;
+    return dispatchData;
  }
  
  JitConstants FullyConnected_fb_io_block::GetJitConstants(const fully_connected_params& params,
-                                                         const FullyConnectedKernelBase::DispatchData& run_info) const {
-    auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, run_info);
+                                                         const FullyConnectedKernelBase::DispatchData& dispatchData) const {
+    auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, dispatchData);
      cldnn_jit.AddConstants({
-        MakeJitConstant("SUB_GROUP_SIZE", run_info.lws0),
-        MakeJitConstant("WORK_ITEMS_PER_BATCH", run_info.gws1),
-        MakeJitConstant("UNIT_BYTE_SIZE", run_info.unit_byte_size),
-        MakeJitConstant("CHUNK_TYPE", run_info.chunk_type),
-        MakeJitConstant("CHUNK_BYTE_SIZE", run_info.chunk_byte_size),
-        MakeJitConstant("UNITS_PER_CHUNK", run_info.units_per_chunk),
-        MakeJitConstant("BYTES_PER_SG_READ", run_info.bytes_per_sg_read),
-        MakeJitConstant("UNITS_PER_SG_READ", run_info.units_per_sg_read),
-        MakeJitConstant("RG_COUNT", run_info.rg_count),
-        MakeJitConstant("LAST_RG_SIZE", run_info.last_rg_size),
+        MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]),
+        MakeJitConstant("WORK_ITEMS_PER_BATCH", dispatchData.gws[1]),
+        MakeJitConstant("UNIT_BYTE_SIZE", dispatchData.unit_byte_size),
+        MakeJitConstant("CHUNK_TYPE", dispatchData.chunk_type),
+        MakeJitConstant("CHUNK_BYTE_SIZE", dispatchData.chunk_byte_size),
+        MakeJitConstant("UNITS_PER_CHUNK", dispatchData.units_per_chunk),
+        MakeJitConstant("BYTES_PER_SG_READ", dispatchData.bytes_per_sg_read),
+        MakeJitConstant("UNITS_PER_SG_READ", dispatchData.units_per_sg_read),
+        MakeJitConstant("RG_COUNT", dispatchData.rg_count),
+        MakeJitConstant("LAST_RG_SIZE", dispatchData.last_rg_size),
      });
      return cldnn_jit;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h

index 550c9ee..8545ae5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h
@@ -29,7 +29,7 @@ public:
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
      JitConstants GetJitConstants(const fully_connected_params& params,
-                                 const FullyConnectedKernelBase::DispatchData& kd) const override;
+                                 const FullyConnectedKernelBase::DispatchData& dispatchData) const override;
      DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp

index f769fde..ed8aa49 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp
@@ -32,8 +32,8 @@ ParamsKey FullyConnected_fb_io_ref::GetSupportedKey() const {
      return k;
  }
  
-JitConstants FullyConnected_fb_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants FullyConnected_fb_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h

index ee844a7..2a45ee3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h
@@ -31,6 +31,6 @@ protected:
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::ACTIVATION };
      }
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp

index e2254ce..64cd0d8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -33,15 +33,15 @@ ParamsKey FullyConnected_fb_oi_b8_ref::GetSupportedKey() const {
  
  FullyConnected_fb_oi_b8_ref::DispatchData FullyConnected_fb_oi_b8_ref::SetDefault(const fully_connected_params& arg,
                                                                                    int) const {
-    auto kd = FullyConnectedKernelBase::SetDefault(arg);
+    auto dispatchData = FullyConnectedKernelBase::SetDefault(arg);
  
      const auto& output = arg.output;
-    kd.gws0 = output.Batch().v;
-    kd.gws1 = output.LogicalSize() / kd.gws0;
-    kd.lws0 = 8;
-    kd.lws1 = 1;
+    dispatchData.gws[0] = output.Batch().v;
+    dispatchData.gws[1] = output.LogicalSize() / dispatchData.gws[0];
+    dispatchData.lws[0] = 8;
+    dispatchData.lws[1] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  bool FullyConnected_fb_oi_b8_ref::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp

index bcfedd6..037e536 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp
@@ -33,8 +33,8 @@ ParamsKey FullyConnected_fb_oi_ref::GetSupportedKey() const {
  }
  
  
-JitConstants FullyConnected_fb_oi_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants FullyConnected_fb_oi_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetUnitType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h

index 1461a23..3780103 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h
@@ -31,6 +31,6 @@ protected:
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::ACTIVATION };
      }
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp

index 718992c..6fba66a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp
@@ -44,28 +44,28 @@ ParamsKey FullyConnected_fs_byx_fsv32::GetSupportedKey() const {
  FullyConnected_fs_byx_fsv32::Parent::DispatchData FullyConnected_fs_byx_fsv32::SetDefault(
      const fully_connected_params& params,
      int autoTuneIndex) const {
-    auto runInfo = Parent::SetDefault(params, autoTuneIndex);
+    auto dispatchData = Parent::SetDefault(params, autoTuneIndex);
  
      auto blockSizeB = std::min(outputBlockSizeB, params.output.Batch().v);
      auto blockNumB = CeilDiv(params.output.Batch().v, blockSizeB);
      auto wgHeight = std::min(preferredWGHeight, blockNumB);
  
-    runInfo.gws0 = CeilDiv(params.output.Feature().v, outputBlockSizeF);
-    runInfo.gws1 = RoundUp(blockNumB, wgHeight);
-    runInfo.gws2 = subGroupSize;
+    dispatchData.gws[0] = CeilDiv(params.output.Feature().v, outputBlockSizeF);
+    dispatchData.gws[1] = RoundUp(blockNumB, wgHeight);
+    dispatchData.gws[2] = subGroupSize;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = wgHeight;
-    runInfo.lws2 = subGroupSize;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = wgHeight;
+    dispatchData.lws[2] = subGroupSize;
  
-    runInfo.efficiency = FORCE_PRIORITY_5;
+    dispatchData.efficiency = FORCE_PRIORITY_5;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants FullyConnected_fs_byx_fsv32::GetJitConstants(const fully_connected_params& params,
-                                                          const DispatchData& kd) const {
-    auto jit = Parent::GetJitConstants(params, kd);
+                                                          const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      auto blockSizeB = std::min(outputBlockSizeB, params.output.Batch().v);
      auto blockNumB = CeilDiv(params.output.Batch().v, blockSizeB);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h

index 350d800..77511e9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h
@@ -28,6 +28,6 @@ public:
  protected:
      ParamsKey GetSupportedKey() const override;
      DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp

index ded8ebb..753916c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp
@@ -55,17 +55,17 @@ FullyConnectedKernelIMAD::Parent::DispatchData FullyConnectedKernelIMAD::SetDefa
      int) const {
      const int simdSize = 16;
  
-    auto runInfo = Parent::SetDefault(params);
+    auto dispatchData = Parent::SetDefault(params);
  
-    runInfo.gws0 = RoundUp(params.output.Feature().v, simdSize);
-    runInfo.gws1 = params.output.Batch().v;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = RoundUp(params.output.Feature().v, simdSize);
+    dispatchData.gws[1] = params.output.Batch().v;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = simdSize;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = simdSize;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }  // SetDefault
  
  bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_params& options) const {
@@ -95,8 +95,8 @@ bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_par
      return true;
  }  // Validate
  
-JitConstants FullyConnectedKernelIMAD::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const {
-    auto jit = Parent::GetJitConstants(params, kd);
+JitConstants FullyConnectedKernelIMAD::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h

index 718ecc8..f0de0be 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h
@@ -31,7 +31,7 @@ public:
  protected:
      bool Validate(const Params& params, const optional_params& options) const override;
      DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
                   FusedOpType::SCALE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp

index b560f6e..8b2e9f7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp
@@ -82,28 +82,20 @@ FullyConnectedKernelMMAD::FullyConnectedTuningData FullyConnectedKernelMMAD::Set
  FullyConnectedKernelMMAD::DispatchData FullyConnectedKernelMMAD::SetDefault(const fully_connected_params& params,
                                                                              int) const {
      FullyConnectedTuningData tuning_data = SetTuningParams(params);
-    auto runInfo = Parent::SetDefault(params);
+    auto dispatchData = Parent::SetDefault(params);
      const auto& output = params.output;
  
-    std::vector<size_t> global = { Align(output.Feature().v, tuning_data.sub_group_size) * tuning_data.slm_div_factor, output.Batch().v, 1 };
-    std::vector<size_t> local = { tuning_data.work_group_size, 1, 1 };
+    dispatchData.gws = { Align(output.Feature().v, tuning_data.sub_group_size) * tuning_data.slm_div_factor, output.Batch().v, 1 };
+    dispatchData.lws = { tuning_data.work_group_size, 1, 1 };
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants FullyConnectedKernelMMAD::GetJitConstants(const fully_connected_params& params,
-                                                       const DispatchData& runInfo) const {
+                                                       const DispatchData& dispatchData) const {
      FullyConnectedTuningData tuning_data = SetTuningParams(params);
  
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      auto& input = params.inputs[0];
      auto& weights = params.weights;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h

index 704b291..af7cb33 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h
@@ -36,7 +36,7 @@ public:
      };
  
  protected:
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp

index b5d84af..49057ae 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp
@@ -34,8 +34,8 @@ ParamsKey FullyConnected_yxfb_ref::GetSupportedKey() const {
      return k;
  }
  
-JitConstants FullyConnected_yxfb_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants FullyConnected_yxfb_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      if (!params.fused_ops.empty()) {
          auto input_dt = GetUnitType(params);
          FusedOpsConfiguration conf = { "", {"b", "f", "y", "x"}, "result", input_dt, 1 };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h

index 60af787..dcab3ba 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h
@@ -32,6 +32,6 @@ protected:
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::ACTIVATION };
      }
-    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp

index 515e2b2..be3f08a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
@@ -109,7 +109,7 @@ bool fused_conv_eltwise_kernel_base::Validate(const Params& p, const optional_pa
  }
  
  JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_eltwise_params& params,
-                                                             const DispatchData& kd) const {
+                                                             const DispatchData& dispatchData) const {
      JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
      const auto& padding = params.conv.padding;
      const auto& input = params.inputs[0];
@@ -151,12 +151,12 @@ JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_el
      std::vector<uint32_t> unrollLoopParams{params.conv.filterSize.x,
                                             params.conv.filterSize.y,
                                             params.conv.filterSize.z,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDX,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDY,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDZ,
-                                           (uint32_t)kd.gemmStyle.subBlockDimM,
-                                           (uint32_t)kd.gemmStyle.subBlockDimK,
-                                           (uint32_t)kd.gemmStyle.subBlockDimN};
+                                           (uint32_t)dispatchData.gemmStyle.globalWorkSizeDX,
+                                           (uint32_t)dispatchData.gemmStyle.globalWorkSizeDY,
+                                           (uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ,
+                                           (uint32_t)dispatchData.gemmStyle.subBlockDimM,
+                                           (uint32_t)dispatchData.gemmStyle.subBlockDimK,
+                                           (uint32_t)dispatchData.gemmStyle.subBlockDimN};
  
      auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end());
  
@@ -166,13 +166,15 @@ JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_el
      return mem_consts;
  }
  
-bool fused_conv_eltwise_kernel_base::CheckWorkGroups(const fused_conv_eltwise_kernel_base::DispatchData& kd) {
-    if (kd.gws0 == 0 || kd.gws1 == 0 || kd.gws2 == 0 || kd.lws0 == 0 || kd.lws1 == 0 || kd.lws2 == 0) {
+bool fused_conv_eltwise_kernel_base::CheckWorkGroups(const fused_conv_eltwise_kernel_base::DispatchData& dispatchData) {
+    if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3)
          return false;
-    }
  
-    if ((kd.gws0 % kd.lws0) != 0 || (kd.gws1 % kd.lws1) != 0 || (kd.gws2 % kd.lws2) != 0) {
-        return false;
+    for (size_t i = 0; i < dispatchData.gws.size(); i++) {
+        if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0)
+            return false;
+        if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0)
+            return false;
      }
  
      return true;
@@ -216,43 +218,34 @@ bool fused_conv_eltwise_kernel_base::CheckPitchForSplitOnly(const fused_conv_elt
  fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_base::SetDefault(
      const fused_conv_eltwise_params& params,
      int) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& out = params.output;
-    kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
-    std::vector<size_t> global;
+
      if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf ||
          params.output.GetLayout() == DataLayout::bfzyx || params.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 ||
          params.output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16) {
-        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+        dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
      } else {
-        global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v * out.Z().v };
+        dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v * out.Z().v };
      }
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    kd.cldnnStyle.blockWidth = 1;
-    kd.cldnnStyle.blockHeight = 1;
-    kd.cldnnStyle.prefetch = 0;
-    kd.cldnnStyle.inputBlockArraySize = 0;
-    kd.cldnnStyle.inputBlockWidth = 0;
-
-    kd.gemmStyle.globalWorkSizeDX = 1;
-    kd.gemmStyle.globalWorkSizeDY = 1;
-    kd.gemmStyle.globalWorkSizeDZ = 1;
-    kd.gemmStyle.subBlockDimK = 1;
-    kd.gemmStyle.subBlockDimM = 0;
-    kd.gemmStyle.subBlockDimN = 0;
-    kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    return kd;
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+
+    dispatchData.cldnnStyle.blockWidth = 1;
+    dispatchData.cldnnStyle.blockHeight = 1;
+    dispatchData.cldnnStyle.prefetch = 0;
+    dispatchData.cldnnStyle.inputBlockArraySize = 0;
+    dispatchData.cldnnStyle.inputBlockWidth = 0;
+
+    dispatchData.gemmStyle.globalWorkSizeDX = 1;
+    dispatchData.gemmStyle.globalWorkSizeDY = 1;
+    dispatchData.gemmStyle.globalWorkSizeDZ = 1;
+    dispatchData.gemmStyle.subBlockDimK = 1;
+    dispatchData.gemmStyle.subBlockDimM = 0;
+    dispatchData.gemmStyle.subBlockDimN = 0;
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+    return dispatchData;
  }
  
  KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& params,
@@ -269,9 +262,9 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p
      if (NeedPaddedInput()) {
          kd.reorderInput = CovolutionUpdateInputParams(newParams);
      }
-    DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
+    DispatchData dispatchData = SetDefault(newParams, autoTuneIndex);
  
-    if (!CheckWorkGroups(runInfo)) {
+    if (!CheckWorkGroups(dispatchData)) {
          // Internal Error - wrong calculation of global/local work group sizes
          return {};
      }
@@ -287,13 +280,13 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p
      }
  
      auto finalKernelName = GetKernelName(newParams);
-    auto cldnnJit = GetJitConstants(newParams, runInfo);
+    auto cldnnJit = GetJitConstants(newParams, dispatchData);
      auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
      auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       finalKernelName,
                       jit,
@@ -310,7 +303,7 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
      }
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
      kd.autoTuneIndex = autoTuneIndex;
  
      return {kd};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h

index 4d1d1aa..9e69183 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
@@ -111,7 +111,7 @@ protected:
      virtual std::string GetKernelName(const fused_conv_eltwise_params&) const { return kernelName; }
      virtual bool NeedPaddedInput() const { return false; }
      bool Validate(const Params& p, const optional_params& o) const override;
-    virtual JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const;
      virtual DispatchData SetDefault(const fused_conv_eltwise_params& params, int autoTuneIndex = -1) const;
      static bool CheckWorkGroups(const DispatchData&);
      static bool CheckPitchForSplitOnly(const fused_conv_eltwise_params& params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp

index de8ea67..894cdf0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp
@@ -132,29 +132,29 @@ WeightsLayout fused_conv_eltwise_kernel_bfyx_1x1_opt::GetPreferreddWeightsLayout
  fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_1x1_opt::SetDefault(
      const fused_conv_eltwise_params& arg,
      int) const {
-    DispatchData runInfo = Parent::SetDefault(arg);
+    DispatchData dispatchData = Parent::SetDefault(arg);
  
      constexpr size_t sub_group_size = 8;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      auto block = get_out_block_size(arg);
  
-    runInfo.gws0 = arg.output.X().v / block.out_width;
-    runInfo.gws1 = arg.output.Y().v / block.out_height;
-    runInfo.gws2 = 2 * (arg.output.Feature().v * arg.output.Batch().v) /
-                   block.out_depth;  // process 8 output channels per Workitem
+    dispatchData.gws[0] = arg.output.X().v / block.out_width;
+    dispatchData.gws[1] = arg.output.Y().v / block.out_height;
+    dispatchData.gws[2] = 2 * (arg.output.Feature().v * arg.output.Batch().v) /
+                          block.out_depth;  // process 8 output channels per Workitem
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 2 * sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 2 * sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants fused_conv_eltwise_kernel_bfyx_1x1_opt::GetJitConstants(const fused_conv_eltwise_params& params,
-                                                                     const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
+                                                                     const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
      auto block = get_out_block_size(params);
      jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h

index 3f77a72..4ad16b6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h
@@ -35,8 +35,8 @@ protected:
      WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params &) const override;
      std::string GetKernelName(const fused_conv_eltwise_params& params) const override;
      bool NeedPaddedInput() const override { return true; }
-    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp

index 6f91ce2..991f209 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp
@@ -54,19 +54,19 @@ ParamsKey fused_conv_eltwise_kernel_bfyx_iyxo::GetSupportedKey() const {
  fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_iyxo::SetDefault(
      const fused_conv_eltwise_params& cp,
      int) const {
-    DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(cp);
+    DispatchData dispatchData = fused_conv_eltwise_kernel_base::SetDefault(cp);
  
-    runInfo.efficiency = FORCE_PRIORITY_9;
+    dispatchData.efficiency = FORCE_PRIORITY_9;
  
-    runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 4 / 2;
-    runInfo.gws1 = cp.output.Y().v / 2;
-    runInfo.gws2 = sub_group_size;
+    dispatchData.gws[0] = CeilDiv(cp.output.X().v, sub_group_size) / 4 / 2;
+    dispatchData.gws[1] = cp.output.Y().v / 2;
+    dispatchData.gws[2] = sub_group_size;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool fused_conv_eltwise_kernel_bfyx_iyxo::Validate(const Params& p, const optional_params& o) const {
@@ -82,9 +82,9 @@ bool fused_conv_eltwise_kernel_bfyx_iyxo::Validate(const Params& p, const option
  }
  
  JitConstants fused_conv_eltwise_kernel_bfyx_iyxo::GetJitConstants(const fused_conv_eltwise_params& params,
-                                                                  const DispatchData& runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+                                                                  const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2]));
      return jit;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h

index 965a863..1dddc41 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h
@@ -34,7 +34,7 @@ protected:
      WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params&) const override {
          return WeightsLayout::iyxo;
      }
-    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp

index 3f4582e..7b34ea5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp
@@ -148,7 +148,6 @@ fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetAutoTuneOptions(const Params& p,
          option.blockWidth = 4;
          option.blockHeight = 3;
          option.prefetch = 5;
-        // run_info.efficiency = FORCE_PRIORITY_7; // GEMM is better
      }
  
      // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block
@@ -162,38 +161,38 @@ fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetAutoTuneOptions(const Params& p,
  fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::SetDefault(
      const fused_conv_eltwise_params& cp,
      int autoTuneIndex) const {
-    DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(cp);
+    DispatchData dispatchData = fused_conv_eltwise_kernel_base::SetDefault(cp);
  
      const auto of_maps = cp.output.Feature().v;
      const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
      auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
-    runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
-    runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
-    runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+    dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+    dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+    dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch;
  
-    auto input_block_dims = get_bfyx_req_input_block_dims(runInfo.cldnnStyle.blockWidth,
-                                                          runInfo.cldnnStyle.blockHeight,
+    auto input_block_dims = get_bfyx_req_input_block_dims(dispatchData.cldnnStyle.blockWidth,
+                                                          dispatchData.cldnnStyle.blockHeight,
                                                            cp.conv.filterSize,
                                                            cp.conv.stride,
                                                            cp.conv.dilation,
                                                            sub_group_size,
-                                                          runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2,
+                                                          cp.output.GetDType() == Datatype::F16 ? sub_group_size : sub_group_size / 2,
                                                            sub_group_size);
-    runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first;
-    runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second;
+    dispatchData.cldnnStyle.inputBlockArraySize = input_block_dims.first;
+    dispatchData.cldnnStyle.inputBlockWidth = input_block_dims.second;
  
-    runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
-    runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight);
-    runInfo.gws2 = of_threads_per_batch * cp.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth);
+    dispatchData.gws[1] = CeilDiv(cp.output.Y().v, dispatchData.cldnnStyle.blockHeight);
+    dispatchData.gws[2] = of_threads_per_batch * cp.output.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = sub_group_size;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = sub_group_size;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::Validate(const Params& p, const optional_params& o) const {
@@ -205,19 +204,19 @@ bool fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::Validate(const Params& p, cons
  }
  
  JitConstants fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetJitConstants(const fused_conv_eltwise_params& params,
-                                                                          const DispatchData& runInfo) const {
+                                                                          const DispatchData& dispatchData) const {
      const auto of_maps = params.output.Feature().v;
      const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
      size_t leftovers = of_threads_per_batch - of_maps;
  
-    auto jit = Parent::GetJitConstants(params, runInfo);
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth));
-    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight));
-    jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize));
-    jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth));
-    jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2]));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight));
+    jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", dispatchData.cldnnStyle.inputBlockArraySize));
+    jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth));
+    jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch));
  
      if (leftovers) {
          jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h

index 3bda6e1..f4179f3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h
@@ -33,7 +33,7 @@ public:
  
  protected:
      WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params &) const override;
-    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      bool NeedPaddedInput() const override { return true; }
      DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
@@ -50,4 +50,4 @@ private:
  
      std::vector<AutoTuneOption> autoTuneOptions = {};
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp

index 056f439..51a1b75 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp
@@ -76,7 +76,7 @@ size_t GetOfmPerWorkitem(Datatype dataType) {
  fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_yxfb_yxio_b16::SetDefault(
      const fused_conv_eltwise_params& arg,
      int) const {
-    DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(arg);
+    DispatchData dispatchData = fused_conv_eltwise_kernel_base::SetDefault(arg);
  
      const auto filter_ofm_num = arg.weights.OFM().v;
      const auto batch_size = arg.output.Batch().v;
@@ -86,15 +86,15 @@ fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_yxfb_yxio
      const size_t ofmPerWorkItem = GetOfmPerWorkitem(arg.inputs[0].GetDType());
  
      if (arg.inputs[0].GetDType() == Datatype::F16) {
-        runInfo.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
      } else {
-        runInfo.efficiency = FORCE_PRIORITY_9;
+        dispatchData.efficiency = FORCE_PRIORITY_9;
      }
  
-    runInfo.lws0 = min_lws;
-    runInfo.gws0 = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem);
+    dispatchData.lws[0] = min_lws;
+    dispatchData.gws[0] = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem);
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool fused_conv_eltwise_kernel_yxfb_yxio_b16::Validate(const Params& p, const optional_params& o) const {
@@ -138,10 +138,10 @@ bool fused_conv_eltwise_kernel_yxfb_yxio_b16::Validate(const Params& p, const op
  }
  
  JitConstants fused_conv_eltwise_kernel_yxfb_yxio_b16::GetJitConstants(const fused_conv_eltwise_params& params,
-                                                                      const DispatchData& kd) const {
-    auto jit = Parent::GetJitConstants(params, kd);
+                                                                      const DispatchData& dispatchData) const {
+    auto jit = Parent::GetJitConstants(params, dispatchData);
  
-    const auto local_work_group_size = kd.lws0;
+    const auto local_work_group_size = dispatchData.lws[0];
      const auto batch_size = params.output.Batch().v;
  
      if (params.inputs[0].GetDType() == Datatype::F32) {
@@ -166,7 +166,7 @@ JitConstants fused_conv_eltwise_kernel_yxfb_yxio_b16::GetJitConstants(const fuse
      const size_t ofmPerWorkItem = GetOfmPerWorkitem(params.inputs[0].GetDType());
  
      jit.AddConstants({
-        MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0),
+        MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]),
          MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem),
          MakeJitConstant("BATCHES_PER_WORK_ITEM",
                          batchesPerWorkItem),  // how many batches will a single work item compute
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h

index 3a20b49..2d9a509 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h
@@ -37,7 +37,7 @@ protected:
      }
      std::string GetKernelName(const fused_conv_eltwise_params&) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override;
      DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp

index 78f248d..7cc7bb6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp
@@ -101,7 +101,7 @@ static inline std::vector<std::string> GetOrder(size_t size) {
      } else if (size == 6) {
          idx_order = {"b", "f", "w", "z", "y", "x"};
      }
-    
+
      return idx_order;
  }
  
@@ -120,7 +120,7 @@ static std::string GetDictionaryIndexOrder(const gather_params& params, size_t a
  
      for (size_t i = dictionary_dims_num; i < idx_order.size(); i++)
          idx_order[i] = zeroVal;
-    
+
      // Fix size to inputs[0] dims size
      for (size_t i = 0; i < params.output.GetDims().size() - params.inputs[0].GetDims().size(); i++)
          idx_order.pop_back();
@@ -152,33 +152,20 @@ static std::string GetIndecesIdxOrder(const gather_params& params, size_t axis)
  }
  
  CommonDispatchData GatherKernelRef::SetDefault(const gather_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
      const auto& output = params.output;
  
-    std::vector<size_t> global;
-    std::vector<size_t> local;
-
      if (output.GetLayout() == DataLayout::bfyx) {
-        global = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v};
+        dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v};
      } else if (output.GetLayout() == DataLayout::bfzyx) {
-        global = {output.X().v, output.Y().v * output.Z().v, output.Feature().v * output.Batch().v};
+        dispatchData.gws = {output.X().v, output.Y().v * output.Z().v, output.Feature().v * output.Batch().v};
      } else {
-        global = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
+        dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
      }
  
-    local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-    
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const {
@@ -220,14 +207,14 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional
      KernelData kd = KernelData::Default<gather_params>(params);
      gather_params& newParams = *static_cast<gather_params*>(kd.params.get());
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params));
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params));
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp

index 1042910..4f3a2fc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp
@@ -23,49 +23,40 @@ JitConstants GatherTreeKernelBase::GetJitConstants(const gather_tree_params & pa
  }
  
  GatherTreeKernelBase::DispatchData GatherTreeKernelBase::SetDefault(const gather_tree_params & params) const {
-    std::vector<size_t> global{
-                                params.output.Y().v,  // beam
-                                params.output.Feature().v,  // batch
-                                1
-                              };
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    DispatchData dispatchData;
      /*
          b -> time
          f -> batch
          y -> beam
      */
-    DispatchData data;
-    data.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-    data.gws0 = global[0];
-    data.gws1 = global[1];
-    data.gws2 = global[2];
-    data.lws0 = local[0];
-    data.lws1 = local[1];
-    data.lws2 = local[2];
-    return data;
+    dispatchData.gws = { params.output.Y().v,        // beam
+                         params.output.Feature().v,  // batch
+                         1 };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+    return dispatchData;
  }
  
  KernelsData GatherTreeKernelBase::GetCommonKernelsData(const Params& params,
-                                                        const optional_params& options,
-                                                        float estimated_time) const {
+                                                       const optional_params& options,
+                                                       float estimated_time) const {
      assert(params.GetType() == KernelType::GATHER_TREE);
      const auto& gt_params = static_cast<const gather_tree_params&>(params);
  
-    auto run_info = SetDefault(gt_params);
+    auto dispatchData = SetDefault(gt_params);
      auto kernel_data = KernelData::Default<gather_tree_params>(params);
      auto cldnn_jit = GetJitConstants(gt_params);
      auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, options);
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
      FillCLKernelData(kernel_data.kernels[0],
-                        run_info,
-                        params.engineInfo,
-                        kernelName,
-                        jit,
-                        entry_point,
-                        DEFAULT,
-                        false,
-                        false,
-                        static_cast<int>(gt_params.inputs.size()));
+                     dispatchData,
+                     params.engineInfo,
+                     kernelName,
+                     jit,
+                     entry_point,
+                     DEFAULT,
+                     false,
+                     false,
+                     static_cast<int>(gt_params.inputs.size()));
      kernel_data.estimatedTime = estimated_time;
      return { kernel_data };
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp

index 249e47f..e887763 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -36,24 +36,13 @@ JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const {
  GemmKernelBase::DispatchData GemmKernelBase::SetDefault(const gemm_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
-
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    DispatchData dispatchData;
  
      auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v);
-    std::vector<size_t> global = { output.X().v, output.Y().v, total_batches };
-
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.gws = { output.X().v, output.Y().v, total_batches };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
@@ -65,7 +54,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
  
      const auto& prim_params = static_cast<const gemm_params&>(params);
  
-    auto run_info = SetDefault(prim_params);
+    auto dispatchData = SetDefault(prim_params);
      KernelData k_data = KernelData::Default<gemm_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -74,7 +63,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
  
      auto& kernel = k_data.kernels[0];
      FillCLKernelData(kernel,
-                     run_info,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h

index d30d454..5df5bb0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h
@@ -60,7 +60,7 @@ protected:
      virtual DispatchData SetDefault(const gemm_params& params) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
      // Fused ops
-    virtual JitConstants GetFusedPrimitivesJitConstants(const gemm_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetFusedPrimitivesJitConstants(const gemm_params& params, const DispatchData& dispatchData) const;
      Datatype GetActivationType(const gemm_params& params) const;
      // --Fused ops
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp

index df5534a..537825d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp
@@ -75,24 +75,15 @@ GemmKernelBase::DispatchData GemmKernelMMADint8::SetDefault(const gemm_params& p
      const auto& output = params.output;
      auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v);
  
-    DispatchData kd;
+    DispatchData dispatchData;
      GemmTuningData td = SetTuningParams(params);
  
-    std::vector<size_t> global = { Align(output.X().v, td.simd_size),
-                                   Align(output.Y().v, td.simd_size * td.tile_num) / (td.simd_size * td.tile_num),
-                                   total_batches };
+    dispatchData.gws = { Align(output.X().v, td.simd_size),
+                         Align(output.Y().v, td.simd_size * td.tile_num) / (td.simd_size * td.tile_num),
+                         total_batches };
+    dispatchData.lws = { td.simd_size, 1, 1 };
  
-    std::vector<size_t> local = { td.simd_size, 1, 1 };
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  GemmKernelMMADint8::GemmTuningData GemmKernelMMADint8::InitGemmTuningData(const gemm_params& params) const {
@@ -154,7 +145,7 @@ KernelsData GemmKernelMMADint8::GetKernelsData(const Params& params, const optio
  
      const auto& prim_params = static_cast<const gemm_params&>(params);
  
-    auto run_info = GemmKernelMMADint8::SetDefault(prim_params);
+    auto dispatchData = GemmKernelMMADint8::SetDefault(prim_params);
      KernelData k_data = KernelData::Default<gemm_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -163,7 +154,7 @@ KernelsData GemmKernelMMADint8::GetKernelsData(const Params& params, const optio
  
      auto& kernel = k_data.kernels[0];
      FillCLKernelData(kernel,
-                     run_info,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp

index 0b1f307..94d25bf 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp
@@ -72,21 +72,13 @@ GemmKernelBase::DispatchData GemmKernelMMADslmInt8::SetDefault(const gemm_params
      const auto& output = params.output;
      auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v);
  
-    DispatchData kd;
+    DispatchData dispatchData;
      GemmTuningData td = SetTuningParams(params);
  
-    std::vector<size_t> global = { td.size_n / td.pack_size, output.Y().v / td.simd_size, total_batches };
-    std::vector<size_t> local = { td.slm_tile_size / td.pack_size, td.slm_tile_size / td.simd_size, 1 };
+    dispatchData.gws = { td.size_n / td.pack_size, output.Y().v / td.simd_size, total_batches };
+    dispatchData.lws = { td.slm_tile_size / td.pack_size, td.slm_tile_size / td.simd_size, 1 };
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  GemmKernelMMADslmInt8::GemmTuningData GemmKernelMMADslmInt8::InitGemmTuningData(const gemm_params& params) const {
@@ -123,7 +115,7 @@ KernelsData GemmKernelMMADslmInt8::GetKernelsData(const Params& params, const op
  
      const auto& prim_params = static_cast<const gemm_params&>(params);
  
-    auto run_info = GemmKernelMMADslmInt8::SetDefault(prim_params);
+    auto dispatchData = GemmKernelMMADslmInt8::SetDefault(prim_params);
      KernelData k_data = KernelData::Default<gemm_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -132,7 +124,7 @@ KernelsData GemmKernelMMADslmInt8::GetKernelsData(const Params& params, const op
  
      auto& kernel = k_data.kernels[0];
      FillCLKernelData(kernel,
-                     run_info,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp

index 8ae6662..8b72216 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp
@@ -40,21 +40,21 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const {
  GemmKernelBase::DispatchData GemmKernelTiledOpt::SetDefault(const gemm_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
      GemmTuningData td = SetTuningParams(params);
  
      auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v);
      std::vector<size_t> global = { output.X().v, output.Y().v, total_batches };
  
-    kd.gws0 = Align(global[0], td.tile_n_size) / (td.tile_n_size / td.simd_size);
-    kd.gws1 = Align(global[1], td.tile_m_size) / td.tile_m_size;
-    kd.gws2 = global[2];
+    dispatchData.gws[0] = Align(global[0], td.tile_n_size) / (td.tile_n_size / td.simd_size);
+    dispatchData.gws[1] = Align(global[1], td.tile_m_size) / td.tile_m_size;
+    dispatchData.gws[2] = global[2];
  
-    kd.lws0 = td.simd_size;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = td.simd_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gemm_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp

index a63d841..fec3194 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp
@@ -28,21 +28,11 @@ JitConstants GRNKernelBase::GetJitConstants(const grn_params& params, GRNKernelB
  GRNKernelBase::DispatchData GRNKernelBase::SetDefault(const grn_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    DispatchData dispatchData;
+    dispatchData.gws = { output.Batch().v, output.Y().v, output.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    std::vector<size_t> global = { output.Batch().v, output.Y().v, output.X().v };
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData GRNKernelBase::GetCommonKernelsData(const Params& params,
@@ -55,19 +45,17 @@ KernelsData GRNKernelBase::GetCommonKernelsData(const Params& params,
  
      const grn_params& orgParams = static_cast<const grn_params&>(params);
  
-    DispatchData runInfo;
-
-    runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<grn_params>(params);
  
-    auto cldnn_jit = GetJitConstants(orgParams, runInfo);
+    auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
      auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h

index d960541..f17fca8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h
@@ -44,7 +44,7 @@ public:
      using DispatchData = CommonDispatchData;
  
  protected:
-    virtual JitConstants GetJitConstants(const grn_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const grn_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const grn_params& params) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp

index 945524c..ce148e9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp
@@ -56,7 +56,7 @@ static unsigned int GetOfmPerSimd(const lrn_params& params) {
  }
  
  CommonDispatchData LRNKernelAcrossChannelMultipleFeatures::SetDefault(const lrn_params& params) const {
-    CommonDispatchData runInfo = LRNKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params);
      const auto& input = params.inputs[0];
  
      unsigned int ofm_per_simd = GetOfmPerSimd(params);
@@ -65,24 +65,24 @@ CommonDispatchData LRNKernelAcrossChannelMultipleFeatures::SetDefault(const lrn_
          const auto& out = params.output;
          const unsigned int alignment = out.X().v > 16 ? 32 : 16;
  
-        runInfo.gws0 = Align(out.X().v, alignment);
-        runInfo.gws1 = out.Y().v;
-        runInfo.gws2 = (out.Feature().v * out.Batch().v) / ofm_per_simd;
+        dispatchData.gws[0] = Align(out.X().v, alignment);
+        dispatchData.gws[1] = out.Y().v;
+        dispatchData.gws[2] = (out.Feature().v * out.Batch().v) / ofm_per_simd;
  
-        runInfo.lws0 = alignment;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = alignment;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      } else if (input.GetLayout() == DataLayout::yxfb) {
-        runInfo.gws0 /= ofm_per_simd;
-        runInfo.lws0 = std::min(std::max(runInfo.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (runInfo.gws0 % runInfo.lws0 != 0) {
-            --runInfo.lws0;
+        dispatchData.gws[0] /= ofm_per_simd;
+        dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+            --dispatchData.lws[0];
          }
      }
  
-    runInfo.efficiency = FORCE_PRIORITY_6;
+    dispatchData.efficiency = FORCE_PRIORITY_6;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool LRNKernelAcrossChannelMultipleFeatures::Validate(const Params& p, const optional_params& o) const {
@@ -98,8 +98,8 @@ bool LRNKernelAcrossChannelMultipleFeatures::Validate(const Params& p, const opt
      return true;
  }
  
-JitConstants LRNKernelAcrossChannelMultipleFeatures::GetJitConstants(const lrn_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants LRNKernelAcrossChannelMultipleFeatures::GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      const auto& input = params.inputs[0];
      const auto& input_dt = params.inputs[0].GetDType();
      const auto& output = params.output;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h

index 395bc90..384a2e4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h
@@ -35,6 +35,6 @@ protected:
                   FusedOpType::ACTIVATION };
      }
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp

index 69fd391..1746dee 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp
@@ -38,32 +38,23 @@ ParamsKey LRNKernelAcrossChannelMultipleFeaturesFSV16::GetSupportedKey() const {
  }
  
  CommonDispatchData LRNKernelAcrossChannelMultipleFeaturesFSV16::SetDefault(const lrn_params& params) const {
-    CommonDispatchData runInfo = LRNKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params);
  
      const auto& out = params.output;
      const unsigned int alignment = 16;
  
-    std::vector<size_t> global = {Align(out.Feature().v, alignment),
-                                  out.X().v,
-                                  out.Y().v * out.Batch().v};
+    dispatchData.gws = { Align(out.Feature().v, alignment),
+                         out.X().v,
+                         out.Y().v * out.Batch().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.efficiency = FORCE_PRIORITY_6;
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    runInfo.efficiency = FORCE_PRIORITY_6;
-
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants LRNKernelAcrossChannelMultipleFeaturesFSV16::GetJitConstants(const lrn_params& params, const DispatchData& kd) const {
-    JitConstants jit = LRNKernelBase::GetJitConstants(params, kd);
+JitConstants LRNKernelAcrossChannelMultipleFeaturesFSV16::GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = LRNKernelBase::GetJitConstants(params, dispatchData);
      const auto& input_dt = params.inputs[0].GetDType();
  
      if (!params.fused_ops.empty()) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h

index 7827577..397f4c1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h
@@ -27,6 +27,6 @@ public:
  
  private:
      DispatchData SetDefault(const lrn_params& params) const override;
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp

index a551c18..1bc2623 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp
@@ -36,12 +36,12 @@ ParamsKey LRNKernelAcrossChannel_b8::GetSupportedKey() const {
  }
  
  CommonDispatchData LRNKernelAcrossChannel_b8::SetDefault(const lrn_params& params) const {
-    CommonDispatchData run_info = LRNKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params);
  
-    run_info.gws0 /= 8;
-    run_info.lws0 = 8;  // gws0 is dividable by 64, so after correction it will be dividable by 8.
+    dispatchData.gws[0] /= 8;
+    dispatchData.lws[0] = 8;  // gws[0] is dividable by 64, so after correction it will be dividable by 8.
  
-    return run_info;
+    return dispatchData;
  }
  
  bool LRNKernelAcrossChannel_b8::Validate(const Params& p, const optional_params& o) const {
@@ -62,8 +62,8 @@ bool LRNKernelAcrossChannel_b8::Validate(const Params& p, const optional_params&
      return true;
  }
  
-JitConstants LRNKernelAcrossChannel_b8::GetJitConstants(const lrn_params& params, const DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+JitConstants LRNKernelAcrossChannel_b8::GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      const auto& input_dt = params.inputs[0].GetDType();
  
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 8));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h

index 9c1e298..c837a54 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h
@@ -36,6 +36,6 @@ private:
                   FusedOpType::ACTIVATION };
      }
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp

index 693b98a..b4c1443 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp
@@ -40,25 +40,25 @@ ParamsKey LRNKernelAcrossChannelRef::GetSupportedKey() const {
  }
  
  CommonDispatchData LRNKernelAcrossChannelRef::SetDefault(const lrn_params& params) const {
-    CommonDispatchData runInfo = LRNKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params);
  
      if (params.inputs[0].GetLayout() == DataLayout::bfyx) {
          const auto& out = params.output;
-        runInfo.gws0 = Align(out.X().v, 32);
-        runInfo.gws1 = out.Y().v;
-        runInfo.gws2 = out.Feature().v * out.Batch().v;
+        dispatchData.gws[0] = Align(out.X().v, 32);
+        dispatchData.gws[1] = out.Y().v;
+        dispatchData.gws[2] = out.Feature().v * out.Batch().v;
  
-        runInfo.lws0 = 32;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = 32;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants LRNKernelAcrossChannelRef::GetJitConstants(const lrn_params& params,
-    const LRNKernelBase::DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+                                                        const LRNKernelBase::DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      const auto& input_dt = params.inputs[0].GetDType();
  
      if (!params.fused_ops.empty()) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h

index fd206c5..e3832a5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h
@@ -35,6 +35,6 @@ protected:
                   FusedOpType::SCALE,
                   FusedOpType::ACTIVATION };
      }
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp

index 8e444f9..9f4fa16 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -32,7 +32,7 @@ bool LRNKernelBase::Validate(const Params& p, const optional_params& o) const {
      return true;
  }
  
-JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNKernelBase::DispatchData& kd) const {
+JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNKernelBase::DispatchData& /*dispatchData*/) const {
      JitConstants mem_consts = MakeBaseParamsJitConstants(params);
  
      const auto padding = (params.localSize - 1) / 2;
@@ -57,10 +57,10 @@ JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNK
      auto alpha_div_by_size_abs_sqrt = std::sqrt(std::abs(alpha_div_by_size));
  
      mem_consts.AddConstants({
-        MakeJitConstant("ALPHA_AFTER_FACTORED", kd.fp16UnitUsed ? alpha_sign : alpha),
-        MakeJitConstant("ALPHA_DIV_BY_SIZE", kd.fp16UnitUsed ? alpha_sign : alpha_div_by_size),
-        MakeJitConstant("ALPHA_VAL_FACTOR", kd.fp16UnitUsed ? alpha_abs_sqrt : 1.0f),
-        MakeJitConstant("ALPHA_VAL_FACTOR_DIV_BY_SIZE", kd.fp16UnitUsed ? alpha_div_by_size_abs_sqrt : 1.0f),
+        MakeJitConstant("ALPHA_AFTER_FACTORED", params.inputs[0].GetDType() == Datatype::F16 ? alpha_sign : alpha),
+        MakeJitConstant("ALPHA_DIV_BY_SIZE", params.inputs[0].GetDType() == Datatype::F16 ? alpha_sign : alpha_div_by_size),
+        MakeJitConstant("ALPHA_VAL_FACTOR", params.inputs[0].GetDType() == Datatype::F16 ? alpha_abs_sqrt : 1.0f),
+        MakeJitConstant("ALPHA_VAL_FACTOR_DIV_BY_SIZE", params.inputs[0].GetDType() == Datatype::F16 ? alpha_div_by_size_abs_sqrt : 1.0f),
      });
  
      return mem_consts;
@@ -69,22 +69,21 @@ JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNK
  LRNKernelBase::DispatchData LRNKernelBase::SetDefault(const lrn_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
      // Determine global work sizes.
-    kd.gws0 = output.Batch().v * output.Feature().v;  // B, F
-    kd.gws1 = output.X().v;                           // X
-    kd.gws2 = output.Y().v;                           // Y
+    dispatchData.gws[0] = output.Batch().v * output.Feature().v;  // B, F
+    dispatchData.gws[1] = output.X().v;                           // X
+    dispatchData.gws[2] = output.Y().v;                           // Y
                               // Find largest positive local work size that is divider for global work size.
-    kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-    while (kd.gws0 % kd.lws0 != 0) {
-        --kd.lws0;
+    dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+    while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+        --dispatchData.lws[0];
      }
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData LRNKernelBase::GetCommonKernelsData(const Params& params,
@@ -96,17 +95,17 @@ KernelsData LRNKernelBase::GetCommonKernelsData(const Params& params,
  
      const lrn_params& orgParams = static_cast<const lrn_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
      KernelData kd = KernelData::Default<lrn_params>(params);
  
-    auto cldnnJit = GetJitConstants(orgParams, runInfo);
+    auto cldnnJit = GetJitConstants(orgParams, dispatchData);
      auto entryPoint = GetEntryPoint(kernelName, orgParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
      auto fused_deps_total = GetFusedPrimitiveInputsCount(params);
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h

index 8314e85..8b95eff 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h
@@ -61,7 +61,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    virtual JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const;
+    virtual JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const;
      virtual DispatchData SetDefault(const lrn_params& params) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp

index 86ccca3..b856877 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ ParamsKey LRNKernelRef::GetSupportedKey() const {
      return k;
  }
  
-JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKernelRef::Parent::DispatchData& kd) const {
+JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKernelRef::Parent::DispatchData& dispatchData) const {
      const uint32_t round_norm_size = (params.localSize / 2) * 2 + 1;
      uint32_t numElement = round_norm_size * round_norm_size;
      const auto& input_dt = params.inputs[0].GetDType();
@@ -54,7 +54,7 @@ JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKe
  
      const float num_element_div = 1.f / static_cast<float>(numElement);
  
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      jit.AddConstants({
          MakeJitConstant("NUM_ELEMENTS_DIV", num_element_div),
          MakeJitConstant("GWS_BATCH", 2),
@@ -71,22 +71,14 @@ JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKe
  }
  
  LRNKernelRef::Parent::DispatchData LRNKernelRef::SetDefault(const lrn_params& params) const {
-    DispatchData kd = Parent::SetDefault(params);
+    DispatchData dispatchData = Parent::SetDefault(params);
  
      const auto& out = params.output;
  
-    std::vector<size_t> global = {out.X().v * out.Y().v, out.Feature().v, out.Batch().v};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { out.X().v * out.Y().v, out.Feature().v, out.Batch().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData LRNKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h

index 0872feb..36be0cb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h
@@ -35,6 +35,6 @@ private:
                   FusedOpType::SCALE,
                   FusedOpType::ACTIVATION };
      }
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp

index 5b2f254..e3530c8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -39,9 +39,8 @@ ParamsKey LRNKernelWithinChannelByxfOpt::GetSupportedKey() const {
      return k;
  }
  
-JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants(
-    const lrn_params& params,
-    const LRNKernelBase::DispatchData& kd) const {
+JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants(const lrn_params& params,
+                                                            const LRNKernelBase::DispatchData& dispatchData) const {
      const uint32_t round_norm_size = (params.localSize / 2) * 2 + 1;
      uint32_t numElement = round_norm_size * round_norm_size;
      const auto& input_dt = params.inputs[0].GetDType();
@@ -52,7 +51,7 @@ JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants(
  
      const float num_element_div = 1.f / static_cast<float>(numElement);
  
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      jit.AddConstants({
          MakeJitConstant("NUM_ELEMENTS_DIV", num_element_div),
          MakeJitConstant("GWS_BATCH", 2),
@@ -70,22 +69,14 @@ JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants(
  
  LRNKernelWithinChannelByxfOpt::Parent::DispatchData LRNKernelWithinChannelByxfOpt::SetDefault(
      const lrn_params& params) const {
-    DispatchData kd = Parent::SetDefault(params);
+    DispatchData dispatchData = Parent::SetDefault(params);
  
      const auto& out = params.output;
  
-    std::vector<size_t> global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, 8), out.Batch().v};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { out.X().v * out.Y().v, CeilDiv(out.Feature().v, 8), out.Batch().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  bool LRNKernelWithinChannelByxfOpt::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h

index 9cdd64f..4ae0e54 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h
@@ -37,6 +37,6 @@ private:
                   FusedOpType::ACTIVATION };
      }
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp

index b788ced..de4f0f5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -38,22 +38,22 @@ ParamsKey LRNKernelWithinChannel::GetSupportedKey() const {
  }
  
  CommonDispatchData LRNKernelWithinChannel::SetDefault(const lrn_params& params) const {
-    CommonDispatchData runInfo = LRNKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params);
  
-    runInfo.gws0 = 128 * 128;
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = 128 * 128;
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = 128;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 128;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants LRNKernelWithinChannel::GetJitConstants(const lrn_params& params,
-                                                     const LRNKernelWithinChannel::Parent::DispatchData& kd) const {
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+                                                     const LRNKernelWithinChannel::Parent::DispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      const auto& input_dt = params.inputs[0].GetDType();
  
      if (!params.fused_ops.empty()) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h

index adaf9c3..93500a8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h
@@ -36,6 +36,6 @@ private:
                   FusedOpType::ACTIVATION };
      }
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp

index 22e95f7..4b69db9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -38,19 +38,19 @@ ParamsKey LRNKernelWithinChannelOpt::GetSupportedKey() const {
  }
  
  CommonDispatchData LRNKernelWithinChannelOpt::SetDefault(const lrn_params& params) const {
-    CommonDispatchData runInfo = LRNKernelBase::SetDefault(params);
+    CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params);
      const auto totalSize = params.inputs[0].LogicalSize();
      const unsigned work_group_size = (totalSize < 128) ? 32 : 128;
  
-    runInfo.gws0 = Align(params.inputs[0].LogicalSize(), work_group_size);
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = Align(params.inputs[0].LogicalSize(), work_group_size);
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = work_group_size;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = work_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool LRNKernelWithinChannelOpt::Validate(const Params& p, const optional_params& o) const {
@@ -60,9 +60,9 @@ bool LRNKernelWithinChannelOpt::Validate(const Params& p, const optional_params&
      return true;
  }
  
-JitConstants LRNKernelWithinChannelOpt::GetJitConstants(const lrn_params& params, const LRNKernelWithinChannelOpt::Parent::DispatchData& kd) const {
+JitConstants LRNKernelWithinChannelOpt::GetJitConstants(const lrn_params& params, const LRNKernelWithinChannelOpt::Parent::DispatchData& dispatchData) const {
      const auto& input_dt = params.inputs[0].GetDType();
-    JitConstants jit = Parent::GetJitConstants(params, kd);
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          FusedOpsConfiguration conf = {"", {"batch_id", "feature_id", "y", "x"}, "lrn_result", input_dt, 1};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h

index 8740055..cce68e2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h
@@ -35,6 +35,6 @@ private:
                   FusedOpType::ACTIVATION };
      }
      bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override;
+    JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp

index 3768059..6088de2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -75,7 +75,7 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params,
          return {};
      }
  
-    DispatchData run_info;
+    DispatchData dispatchData;
  
      KernelData kd = KernelData::Default<lstm_dynamic_input_params>(params);
      lstm_dynamic_input_params& dlstm_params = *static_cast<lstm_dynamic_input_params*>(kd.params.get());
@@ -83,18 +83,8 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params,
      const auto& out = dlstm_params.output;
      auto hidden_size = out.X().v;
  
-    std::vector<size_t> global = { hidden_size / simd_size, out.Batch().v * out.Y().v, out.Feature().v };
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    run_info.gws0 = global[0];
-    run_info.gws1 = global[1];
-    run_info.gws2 = global[2];
-
-    run_info.lws0 = local[0];
-    run_info.lws1 = local[1];
-    run_info.lws2 = local[2];
-
-    run_info.fp16UnitUsed = dlstm_params.inputs[0].GetDType() == Datatype::F16;
+    dispatchData.gws = { hidden_size / simd_size, out.Batch().v * out.Y().v, out.Feature().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
      bool succeed = UpdateWeightsParams(dlstm_params,
          options,
@@ -111,8 +101,8 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params,
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    kernel.workGroups.global = { run_info.gws0, run_info.gws1, run_info.gws2 };
-    kernel.workGroups.local = { run_info.lws0, run_info.lws1, run_info.lws2 };
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local = dispatchData.lws;
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
      SetKernelArguments(dlstm_params, kernel);
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp

index aecd6e6..aea352f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -37,23 +37,14 @@ JitConstants LSTM_DynamicInputKernelBase::GetJitConstants(const lstm_dynamic_inp
  
  LSTM_DynamicInputKernelBase::DispatchData LSTM_DynamicInputKernelBase::SetDefault(
      const lstm_dynamic_input_params& params) {
-    DispatchData kd;
+    DispatchData dispatchData;
      const auto& out = params.output;
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
  
      // 4 * hidden, batch * dir, seq_len
-    std::vector<size_t> global = {out.X().v, out.Batch().v * out.Y().v, out.Feature().v};
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { out.X().v, out.Batch().v * out.Y().v, out.Feature().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  void kernel_selector::LSTM_DynamicInputKernelBase::SetKernelArguments(const lstm_dynamic_input_params& params, clKernelData& kernel) const {
@@ -75,7 +66,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para
  
      const lstm_dynamic_input_params& orgParams = static_cast<const lstm_dynamic_input_params&>(params);
  
-    auto run_info = SetDefault(orgParams);
+    auto dispatchData = SetDefault(orgParams);
      KernelData k_data = KernelData::Default<lstm_dynamic_input_params>(params, 1);
  
      auto cldnn_jit = GetJitConstants(orgParams);
@@ -83,7 +74,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = k_data.kernels[0];
-    kernel.workGroups.global = {run_info.gws0, run_info.gws1, run_info.gws2};
+    kernel.workGroups.global = dispatchData.gws;
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
      SetKernelArguments(orgParams, kernel);
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp

index 7384048..81acef8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -86,24 +86,15 @@ JitConstants LSTM_DynamicTimeloopKernelBase::GetJitConstants(const lstm_dynamic_
  
  LSTM_DynamicTimeloopKernelBase::DispatchData LSTM_DynamicTimeloopKernelBase::SetDefault(
      const lstm_dynamic_timeloop_params& params) {
-    DispatchData kd;
+    DispatchData dispatchData;
      const auto& out = params.output;
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
  
      auto out_x_size = out.X().v;
      auto gws0 = out_x_size > 256 ? 256 : out_x_size;
-    std::vector<size_t> global = {gws0, out.Batch().v, static_cast<size_t>(params.direction)};
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { gws0, out.Batch().v, static_cast<size_t>(params.direction) };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  void kernel_selector::LSTM_DynamicTimeloopKernelBase::SetKernelArguments(const lstm_dynamic_timeloop_params& params, clKernelData& kernel) const {
@@ -136,7 +127,7 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p
  
      const lstm_dynamic_timeloop_params& org_params = static_cast<const lstm_dynamic_timeloop_params&>(params);
  
-    auto run_info = SetDefault(org_params);
+    auto dispatchData = SetDefault(org_params);
      KernelData k_data = KernelData::Default<lstm_dynamic_timeloop_params>(params, 1);
  
      auto cldnn_jit = GetJitConstants(org_params);
@@ -144,8 +135,8 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = k_data.kernels[0];
-    kernel.workGroups.global = {run_info.gws0, run_info.gws1, run_info.gws2};
-    kernel.workGroups.local  = {run_info.lws0, run_info.lws1, run_info.lws2};
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local  = dispatchData.lws;
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
      SetKernelArguments(org_params, kernel);
      k_data.estimatedTime = estimated_time;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp

index 93406e9..e6f6a86 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -35,32 +35,32 @@ JitConstants MaxUnpoolingKernelBase::GetJitConstants(const max_unpooling_params&
  MaxUnpoolingKernelBase::DispatchData MaxUnpoolingKernelBase::SetDefault(const max_unpooling_params& params) const {
      const auto& input = params.inputs[0];
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
      if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) {
          // Determine global work sizes.
-        kd.gws2 = input.Batch().v * input.Feature().v;  // B, F
-        kd.gws0 = Align(input.X().v, 32);               // X
-        kd.gws1 = input.Y().v;                          // Y
+        dispatchData.gws[2] = input.Batch().v * input.Feature().v;  // B, F
+        dispatchData.gws[0] = Align(input.X().v, 32);               // X
+        dispatchData.gws[1] = input.Y().v;                          // Y
  
-        kd.lws0 = 32;
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[0] = 32;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      } else {
          // Determine global work sizes.
-        kd.gws0 = input.Batch().v * input.Feature().v;  // B, F
-        kd.gws1 = input.X().v;                          // X
-        kd.gws2 = input.Y().v;                          // Y
+        dispatchData.gws[0] = input.Batch().v * input.Feature().v;  // B, F
+        dispatchData.gws[1] = input.X().v;                          // X
+        dispatchData.gws[2] = input.Y().v;                          // Y
  
-        kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (kd.gws0 % kd.lws0 != 0) {
-            --kd.lws0;
+        dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+            --dispatchData.lws[0];
          }
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
@@ -72,7 +72,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
  
      const max_unpooling_params& orgParams = static_cast<const max_unpooling_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<max_unpooling_params>(params);
  
@@ -81,7 +81,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
      kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
  
      kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp

index 80955a1..22011d6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp
@@ -67,7 +67,7 @@ bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_para
  }
  
  MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_params& params) const {
-    auto kd = Parent::SetDefault(params);
+    auto dispatchData = Parent::SetDefault(params);
  
      auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v;
      auto max_wg = params.engineInfo.maxWorkGroupSize;
@@ -79,28 +79,28 @@ MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_p
  
      auto lws = std::max(std::min(items_num, max_lws) / simd, (size_t)1) * simd;
  
-    kd.gws0 = lws;
-    kd.gws1 = CeilDiv(params.output.Feature().v, fsv);
-    kd.gws2 = params.output.Batch().v;
+    dispatchData.gws[0] = lws;
+    dispatchData.gws[1] = CeilDiv(params.output.Feature().v, fsv);
+    dispatchData.gws[2] = params.output.Batch().v;
  
-    kd.lws0 = lws;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = lws;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.itemsNum = 1;
+    dispatchData.itemsNum = 1;
  
-    return kd;
+    return dispatchData;
  }
  
-JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& params, DispatchData kd) const {
-    auto jits = Parent::GetJitConstants(params, kd);
+JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& params, DispatchData dispatchData) const {
+    auto jits = Parent::GetJitConstants(params, dispatchData);
  
      auto activation_dt = GetActivationType(params);
      jits.Merge(MakeTypeJitConstants(activation_dt, "MEAN"));
      jits.AddConstant(MakeJitConstant("SIMD", simd));
-    jits.AddConstant(MakeJitConstant("LWS", kd.lws0));
-    jits.AddConstant(MakeJitConstant("GWS", kd.gws0));
-    jits.AddConstant(MakeJitConstant("ITEM_GROUPS", kd.itemsNum));
+    jits.AddConstant(MakeJitConstant("LWS", dispatchData.lws[0]));
+    jits.AddConstant(MakeJitConstant("GWS", dispatchData.gws[0]));
+    jits.AddConstant(MakeJitConstant("ITEM_GROUPS", dispatchData.itemsNum));
  
      if (!params.fused_ops.empty()) {
          std::vector<std::string> idx_order;
@@ -126,7 +126,7 @@ JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& par
  
  MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(
      const mvn_params& params) const {
-    MultiDispatchData md;
+    MultiDispatchData dispatchData;
  
      auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v;
      auto max_wg = params.engineInfo.maxWorkGroupSize;
@@ -139,43 +139,43 @@ MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::Se
  
      // TODO Check if larger number of work-groups does not provide benefit
      size_t item_groups = pref_work_groups;
-    md.item_groups = item_groups;
+    dispatchData.item_groups = item_groups;
  
      size_t stage1_lws = lws;
  
-    md.stage_1.gws0 = stage1_lws * item_groups;
-    md.stage_1.gws1 = CeilDiv(params.output.Feature().v, fsv);
-    md.stage_1.gws2 = params.output.Batch().v;
+    dispatchData.stage_1.gws[0] = stage1_lws * item_groups;
+    dispatchData.stage_1.gws[1] = CeilDiv(params.output.Feature().v, fsv);
+    dispatchData.stage_1.gws[2] = params.output.Batch().v;
  
-    md.stage_1.lws0 = stage1_lws;
-    md.stage_1.lws1 = 1;
-    md.stage_1.lws2 = 1;
+    dispatchData.stage_1.lws[0] = stage1_lws;
+    dispatchData.stage_1.lws[1] = 1;
+    dispatchData.stage_1.lws[2] = 1;
  
-    md.stage_1.itemsNum = item_groups;
+    dispatchData.stage_1.itemsNum = item_groups;
  
      size_t stage2_lws = std::max(std::min(item_groups, max_lws) / simd, (size_t)1) * simd;
  
-    md.stage_2.gws0 = stage2_lws;
-    md.stage_2.gws1 = CeilDiv(params.output.Feature().v, fsv);
-    md.stage_2.gws2 = params.output.Batch().v;
+    dispatchData.stage_2.gws[0] = stage2_lws;
+    dispatchData.stage_2.gws[1] = CeilDiv(params.output.Feature().v, fsv);
+    dispatchData.stage_2.gws[2] = params.output.Batch().v;
  
-    md.stage_2.lws0 = stage2_lws;
-    md.stage_2.lws1 = 1;
-    md.stage_2.lws2 = 1;
+    dispatchData.stage_2.lws[0] = stage2_lws;
+    dispatchData.stage_2.lws[1] = 1;
+    dispatchData.stage_2.lws[2] = 1;
  
-    md.stage_2.itemsNum = item_groups;
+    dispatchData.stage_2.itemsNum = item_groups;
  
-    md.stage_final.gws0 = std::max(items_num / simd, (size_t)1) * simd;
-    md.stage_final.gws1 = CeilDiv(params.output.Feature().v, fsv);
-    md.stage_final.gws2 = params.output.Batch().v;
+    dispatchData.stage_final.gws[0] = std::max(items_num / simd, (size_t)1) * simd;
+    dispatchData.stage_final.gws[1] = CeilDiv(params.output.Feature().v, fsv);
+    dispatchData.stage_final.gws[2] = params.output.Batch().v;
  
-    md.stage_final.lws0 = simd;
-    md.stage_final.lws1 = 1;
-    md.stage_final.lws2 = 1;
+    dispatchData.stage_final.lws[0] = simd;
+    dispatchData.stage_final.lws[1] = 1;
+    dispatchData.stage_final.lws[2] = 1;
  
-    md.stage_final.itemsNum = 1;
+    dispatchData.stage_final.itemsNum = 1;
  
-    return md;
+    return dispatchData;
  }
  
  KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params,
@@ -187,7 +187,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
      constexpr size_t intermidiate_bytes = 4;
      const mvn_params& orgParams = static_cast<const mvn_params&>(params);
  
-    auto runInfo = SetDefaultForMulti(orgParams);
+    auto dispatchData = SetDefaultForMulti(orgParams);
  
      size_t kernels_num = params.mvnNormalizeVariance ? 5 : 3;
      KernelData kd = KernelData::Default<mvn_params>(params, kernels_num);
@@ -195,13 +195,13 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
      auto finalKernelName = GetKernelName(orgParams);
      {
          // Mean first stage
-        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_1);
+        auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1);
          cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_1", 1));
          auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
          auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
          auto& kernel = kd.kernels[0];
          FillCLKernelData(kernel,
-                         runInfo.stage_1,
+                         dispatchData.stage_1,
                           params.engineInfo,
                           finalKernelName,
                           jit,
@@ -215,17 +215,17 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
          kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
          kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
-                                         runInfo.item_groups * intermidiate_bytes);
+                                         dispatchData.item_groups * intermidiate_bytes);
      }
      {
          // Mean second stage
-        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_2);
+        auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2);
          cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_2", 1));
          auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
          auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
          auto& kernel = kd.kernels[1];
          FillCLKernelData(kernel,
-                         runInfo.stage_2,
+                         dispatchData.stage_2,
                           params.engineInfo,
                           finalKernelName,
                           jit,
@@ -243,13 +243,13 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
      }
      if (params.mvnNormalizeVariance) {
          // Variance first stage
-        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_1);
+        auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1);
          cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_1", 1));
          auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
          auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
          auto& kernel = kd.kernels[2];
          FillCLKernelData(kernel,
-                         runInfo.stage_1,
+                         dispatchData.stage_1,
                           params.engineInfo,
                           finalKernelName,
                           jit,
@@ -266,13 +266,13 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
      }
      if (params.mvnNormalizeVariance) {
          // Variance second stage
-        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_2);
+        auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2);
          cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_2", 1));
          auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
          auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
          auto& kernel = kd.kernels[3];
          FillCLKernelData(kernel,
-                         runInfo.stage_2,
+                         dispatchData.stage_2,
                           params.engineInfo,
                           finalKernelName,
                           jit,
@@ -289,7 +289,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
                                           intermidiate_bytes);
      }
      {  // Final
-        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_final);
+        auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_final);
          cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MAIN", 1));
          cldnn_jit.AddConstant(MakeJitConstant("PRECALC_MEAN", 1));
          cldnn_jit.AddConstant(MakeJitConstant("PRECALC_VARIANCE", params.mvnNormalizeVariance));
@@ -297,7 +297,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
          auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
          auto& kernel = kd.kernels[kernels_num - 1];
          FillCLKernelData(kernel,
-                         runInfo.stage_final,
+                         dispatchData.stage_final,
                           params.engineInfo,
                           finalKernelName,
                           jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp

index 38d9e99..2a1811f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp
@@ -40,7 +40,7 @@ protected:
  
      bool Validate(const Params&, const optional_params&) const override;
      DispatchData SetDefault(const mvn_params& params) const override;
-    JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const mvn_params& params, DispatchData dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return {
              FusedOpType::ACTIVATION,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp

index 75ed07b..4482a18 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
@@ -45,29 +45,16 @@ JitConstants MVNKernelBase::GetJitConstants(const mvn_params& params, MVNKernelB
  MVNKernelBase::DispatchData MVNKernelBase::SetDefault(const mvn_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
-
-    std::vector<size_t> global(3);
-
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
+    DispatchData dispatchData;
      if (params.mvnMode == MVNMode::WITHIN_CHANNELS) {
-        global = {output.Batch().v, output.Feature().v, 1};
+        dispatchData.gws = {output.Batch().v, output.Feature().v, 1};
      } else {
-        global = {output.Batch().v, 1, 1};
+        dispatchData.gws = {output.Batch().v, 1, 1};
      }
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
@@ -80,20 +67,18 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
  
      const mvn_params& orgParams = static_cast<const mvn_params&>(params);
  
-    DispatchData runInfo;
-
-    runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<mvn_params>(params);
  
      auto finalKernelName = GetKernelName(orgParams);
-    auto cldnn_jit = GetJitConstants(orgParams, runInfo);
+    auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
      auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
      auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       finalKernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h

index f2485f1..da2e816 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h
@@ -68,7 +68,7 @@ public:
  
  protected:
      bool Validate(const Params&, const optional_params&) const override;
-    virtual JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const mvn_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const mvn_params& params) const;
      virtual std::string GetKernelName(const mvn_params&) const { return kernelName; }
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp

index c1e006b..e0207c6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -44,57 +44,55 @@ ParamsKey MVNKernelBfyxOpt::GetSupportedKey() const {
  }
  
  MVNKernelBfyxOpt::Parent::DispatchData MVNKernelBfyxOpt::SetDefault(const mvn_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.inputs[0];
  
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
      if (params.mvnMode == MVNMode::WITHIN_CHANNELS) {
-        kd.dataSetSize = input.X().v * input.Y().v * input.Z().v;
-        kd.dataSetsCount = input.Batch().v * input.Feature().v;
+        dispatchData.dataSetSize = input.X().v * input.Y().v * input.Z().v;
+        dispatchData.dataSetsCount = input.Batch().v * input.Feature().v;
      } else {
-        kd.dataSetSize = input.X().v * input.Y().v * input.Z().v * input.Feature().v;
-        kd.dataSetsCount = input.Batch().v;
+        dispatchData.dataSetSize = input.X().v * input.Y().v * input.Z().v * input.Feature().v;
+        dispatchData.dataSetsCount = input.Batch().v;
      }
  
      // start with 1 thread per data set
-    kd.gws0 = 1;
-    kd.gws1 = kd.dataSetsCount;
-    kd.gws2 = 1;
-    kd.itemsNum = kd.dataSetSize;
+    dispatchData.gws[0] = 1;
+    dispatchData.gws[1] = dispatchData.dataSetsCount;
+    dispatchData.gws[2] = 1;
+    dispatchData.itemsNum = dispatchData.dataSetSize;
  
      // We have two units of data per work item in current implementation.
-    auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float));
+    auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType());
      // Combining device execution and local memory restrictions to compute maximum possible LWS.
      auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi);
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
      // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory
      // reads.
-    while ((kd.itemsNum > 32 || kd.lws0 < kd.itemsNum) && (2 * kd.lws0 <= max_lws)) {
-        kd.lws0 *= 2;
-        kd.itemsNum /= 2;
+    while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) {
+        dispatchData.lws[0] *= 2;
+        dispatchData.itemsNum /= 2;
      }
  
-    kd.gws0 = kd.lws0;
-    kd.leftovers = kd.dataSetSize % kd.lws0;
+    dispatchData.gws[0] = dispatchData.lws[0];
+    dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0];
  
-    return kd;
+    return dispatchData;
  }
  
-JitConstants MVNKernelBfyxOpt::GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData kd) const {
-    auto jit = MVNKernelBase::GetJitConstants(params, kd);
+JitConstants MVNKernelBfyxOpt::GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData dispatchData) const {
+    auto jit = MVNKernelBase::GetJitConstants(params, dispatchData);
  
      jit.AddConstants({
-        MakeJitConstant("ITEMS_NUM", kd.itemsNum),
-        MakeJitConstant("LWS", kd.lws0),
-        MakeJitConstant("GWS", kd.gws0),
-        MakeJitConstant("DATA_SETS_COUNT", kd.dataSetsCount),
-        MakeJitConstant("DATA_SET_SIZE", kd.dataSetSize),
-        MakeJitConstant("LEFTOVERS", kd.leftovers),
+        MakeJitConstant("ITEMS_NUM", dispatchData.itemsNum),
+        MakeJitConstant("LWS", dispatchData.lws[0]),
+        MakeJitConstant("GWS", dispatchData.gws[0]),
+        MakeJitConstant("DATA_SETS_COUNT", dispatchData.dataSetsCount),
+        MakeJitConstant("DATA_SET_SIZE", dispatchData.dataSetSize),
+        MakeJitConstant("LEFTOVERS", dispatchData.leftovers),
      });
      auto activation_dt = GetActivationType(params);
      jit.Merge(MakeTypeJitConstants(activation_dt, "ACTIVATION"));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h

index 8fd6561..e184a98 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h
@@ -39,6 +39,6 @@ private:
          };
      }
      DispatchData SetDefault(const mvn_params& params) const override;
-    JitConstants GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData kd) const override;
+    JitConstants GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp

index 63a7a34..296d683 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp
@@ -43,8 +43,8 @@ ParamsKey MVNKernelRef::GetSupportedKey() const {
      return k;
  }
  
-JitConstants MVNKernelRef::GetJitConstants(const mvn_params& params, DispatchData kd) const {
-    auto jits = Parent::GetJitConstants(params, kd);
+JitConstants MVNKernelRef::GetJitConstants(const mvn_params& params, DispatchData dispatchData) const {
+    auto jits = Parent::GetJitConstants(params, dispatchData);
  
      auto activation_dt = GetActivationType(params);
      jits.Merge(MakeTypeJitConstants(activation_dt, "ACTIVATION"));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h

index 5a3f4e8..24f162c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h
@@ -30,7 +30,7 @@ public:
      ParamsKey GetSupportedKey() const override;
  
  protected:
-    JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const mvn_params& params, DispatchData dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return {
              FusedOpType::ACTIVATION,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp

index 2f1d5ea..2ce61fe 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
@@ -42,29 +42,16 @@ JitConstants NormalizeKernelBase::GetJitConstants(const normalize_params& np) co
  NormalizeKernelBase::DispatchData NormalizeKernelBase::SetDefault(const normalize_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
-
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
-    std::vector<size_t> global(3);
-
+    DispatchData dispatchData;
      if (params.normMode == NormalizeMode::WITHIN_SPATIAL) {
-        global = {output.X().v, output.Y().v, output.Batch().v};
+        dispatchData.gws = {output.X().v, output.Y().v, output.Batch().v};
      } else {
-        global = {output.Batch().v, 1, 1};
+        dispatchData.gws = {output.Batch().v, 1, 1};
      }
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params,
@@ -76,9 +63,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params,
  
      const normalize_params& orgParams = static_cast<const normalize_params&>(params);
  
-    DispatchData runInfo;
-
-    runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<normalize_params>(params);
  
@@ -88,7 +73,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params,
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp

index 9a5482d..bd2e448 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -33,27 +33,15 @@ JitConstants OneHotKernelBase::GetJitConstants(const one_hot_params& params) con
  OneHotKernelBase::DispatchData OneHotKernelBase::SetDefault(const one_hot_params& params) {
      const auto& input = params.inputs[0];
  
-    DispatchData kd;
-
-    kd.fp16UnitUsed = input.GetDType() == Datatype::F16;
-
-    std::vector<size_t> global{input.Batch().v, input.Feature().v, input.Y().v * input.X().v};
+    DispatchData dispatchData;
      if (params.output.GetDims().size() == 5) {
-        global[0] = input.Batch().v;
-        global[1] = input.Feature().v * input.Z().v;
-        global[2] = input.Y().v * input.X().v;
+        dispatchData.gws = { input.Batch().v, input.Feature().v * input.Z().v, input.Y().v * input.X().v };
+    } else {
+        dispatchData.gws = { input.Batch().v, input.Feature().v, input.Y().v * input.X().v };
      }
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params,
@@ -64,7 +52,7 @@ KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params,
      const auto& prim_params =
          static_cast<const one_hot_params&>(params);
  
-    auto run_info = SetDefault(prim_params);
+    auto dispatchData = SetDefault(prim_params);
      KernelData k_data = KernelData::Default<one_hot_params>(params);
  
      auto cldnn_jit = GetJitConstants(prim_params);
@@ -72,7 +60,7 @@ KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params,
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = k_data.kernels[0];
-    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
      k_data.estimatedTime = estimated_time;
  
      return {k_data};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp

index d7821a6..946b82e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
@@ -60,7 +60,7 @@ Datatype PoolingKernelBase::GetActivationType(const pooling_params& params) cons
  }
  
  
-JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, PoolingKernelBase::DispatchData kd) const {
+JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, PoolingKernelBase::DispatchData dispatchData) const {
      JitConstants mem_consts = MakeBaseParamsJitConstants(pp);
  
      mem_consts.AddConstants({
@@ -71,7 +71,7 @@ JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, Poolin
          MakeJitConstant(toString(pp.divMode) + "_KERNEL_DIVIDER", 1),
      });
  
-    if (kd.needsBoundary) {
+    if (dispatchData.needsBoundary) {
          mem_consts.AddConstant(MakeJitConstant("CHECK_BOUNDRY", 1));
      }
  
@@ -131,48 +131,46 @@ bool PoolingKernelBase::EnableRound(const kernel_selector::pooling_params& param
  PoolingKernelBase::DispatchData PoolingKernelBase::SetDefault(const pooling_params& params) const {
      const auto& output = params.output;
  
-    DispatchData kd;
-
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    DispatchData dispatchData;
  
      if (output.GetLayout() == DataLayout::bfyx || output.GetLayout() == DataLayout::b_fs_yx_fsv4 ||
          output.GetLayout() == DataLayout::byxf ||
          output.GetLayout() == DataLayout::bfzyx || output.GetLayout() == DataLayout::b_fs_zyx_fsv16 ||
          output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16) {
          // Determine global work sizes.
-        kd.gws0 = Align(output.X().v, 32);                // X
-        kd.gws1 = output.Y().v * output.Z().v;            // Y, Z
-        kd.gws2 = output.Batch().v * output.Feature().v;  // B, F
+        dispatchData.gws[0] = Align(output.X().v, 32);                // X
+        dispatchData.gws[1] = output.Y().v * output.Z().v;            // Y, Z
+        dispatchData.gws[2] = output.Batch().v * output.Feature().v;  // B, F
  
          // Find largest positive local work size that is divider for global work size.
-        kd.lws0 = 32;
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[0] = 32;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      } else if (output.GetLayout() == DataLayout::b_fs_yx_fsv32 || output.GetLayout() == DataLayout::b_fs_zyx_fsv32) {
-        kd.gws0 = 32;
-        kd.gws1 = output.Y().v * output.X().v * output.Z().v;
-        kd.gws2 = output.Batch().v * CeilDiv(output.Feature().v, 32);
+        dispatchData.gws[0] = 32;
+        dispatchData.gws[1] = output.Y().v * output.X().v * output.Z().v;
+        dispatchData.gws[2] = output.Batch().v * CeilDiv(output.Feature().v, 32);
  
-        kd.lws0 = 32;
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[0] = 32;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      } else {
          // Determine global work sizes.
-        kd.gws0 = output.Batch().v * output.Feature().v;  // B, F
-        kd.gws1 = output.X().v;                           // X
-        kd.gws2 = output.Y().v * output.Z().v;            // Y * Z
+        dispatchData.gws[0] = output.Batch().v * output.Feature().v;  // B, F
+        dispatchData.gws[1] = output.X().v;                           // X
+        dispatchData.gws[2] = output.Y().v * output.Z().v;            // Y * Z
  
-        kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (kd.gws0 % kd.lws0 != 0) {
-            --kd.lws0;
+        dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+            --dispatchData.lws[0];
          }
-        kd.lws1 = 1;
-        kd.lws2 = 1;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
  
-    kd.needsBoundary = NeedsBoundaryCheck(params);
+    dispatchData.needsBoundary = NeedsBoundaryCheck(params);
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
@@ -184,16 +182,16 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
  
      const pooling_params& orgParams = static_cast<const pooling_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
  
      KernelData kd = KernelData::Default<pooling_params>(params);
  
-    auto cldnn_jit = GetJitConstants(orgParams, runInfo);
+    auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
      auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1,
                       GetFusedPrimitiveInputsCount(params));
      if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h

index a9bcfda..76e6bab 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h
@@ -65,7 +65,7 @@ public:
  
  protected:
      bool Validate(const Params&, const optional_params&) const override;
-    virtual JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const pooling_params& params) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
      Datatype GetAccumulatorType(const pooling_params& p) const;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp

index 157430a..16df4fa 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp
@@ -63,7 +63,7 @@ size_t PoolingKernel_b_fs_yx_fsv16::GetSimdSize(const pooling_params& params) co
  }
  
  PoolingKernelBase::DispatchData PoolingKernel_b_fs_yx_fsv16::SetDefault(const pooling_params& params) const {
-    DispatchData kd = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
      const auto& out = params.output;
      const size_t alignment = GetSimdSize(params);
@@ -73,25 +73,25 @@ PoolingKernelBase::DispatchData PoolingKernel_b_fs_yx_fsv16::SetDefault(const po
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = CeilDiv(x, x_block_size) * y;
-    kd.gws1 = Align(f, alignment);
-    kd.gws2 = b;
+    dispatchData.gws[0] = CeilDiv(x, x_block_size) * y;
+    dispatchData.gws[1] = Align(f, alignment);
+    dispatchData.gws[2] = b;
  
-    kd.lws0 = 1;
-    kd.lws1 = alignment;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = alignment;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_2;
+    dispatchData.efficiency = FORCE_PRIORITY_2;
  
-    return kd;
+    return dispatchData;
  }
  
-JitConstants PoolingKernel_b_fs_yx_fsv16::GetJitConstants(const pooling_params& params, DispatchData runInfo) const {
+JitConstants PoolingKernel_b_fs_yx_fsv16::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
      const size_t alignment = GetSimdSize(params);
      size_t x_block_size = GetBlockSize(params);
      auto input = params.inputs[0];
      auto output = params.output;
-    auto jit = PoolingKernelBase::GetJitConstants(params, runInfo);
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
  
      size_t input_line_size = params.poolStride.x * (x_block_size - 1) + params.poolSize.x;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h

index 6b35c94..06c3ea2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h
@@ -28,7 +28,7 @@ public:
  
  protected:
      bool Validate(const Params&, const optional_params&) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      DispatchData SetDefault(const pooling_params& params) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp

index 6375f73..f38905d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp
@@ -42,24 +42,19 @@ ParamsKey PoolingKerneGPU_b_fs_yx_fsv4::GetSupportedKey() const {
  }
  
  PoolingKernelBase::DispatchData PoolingKerneGPU_b_fs_yx_fsv4::SetDefault(const pooling_params& params) const {
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
-    runInfo.gws0 = params.output.X().v;  // X
-    runInfo.gws1 = params.output.Y().v;  // Y
+    dispatchData.gws[0] = params.output.X().v;  // X
+    dispatchData.gws[1] = params.output.Y().v;  // Y
      // we got b_fs_yx_fsv4 format, we process 4 features per workitem
-    runInfo.gws2 = CeilDiv(params.output.Feature().v, 4) * params.output.Batch().v;
+    dispatchData.gws[2] = CeilDiv(params.output.Feature().v, 4) * params.output.Batch().v;
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes({ runInfo.gws0, runInfo.gws1, runInfo.gws2 }, params.engineInfo);
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants PoolingKerneGPU_b_fs_yx_fsv4::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKerneGPU_b_fs_yx_fsv4::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
  
      const size_t in_x_pitch = 4;
      const size_t in_y_pitch = 4 * params.inputs[0].X().LogicalDimPadded();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h

index fd12d65..f771453 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h
@@ -35,6 +35,6 @@ public:
      }
  
  protected:
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp

index 802b218..1fa2473 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp
@@ -45,7 +45,7 @@ ParamsKey PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetSupportedKey() const {
  }
  
  PoolingKernelBase::DispatchData PoolingKernelGPU_b_fs_zyx_fsv16_imad::SetDefault(const pooling_params& params) const {
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
      const auto& out = params.output;
      auto x = out.X().v;
@@ -54,22 +54,17 @@ PoolingKernelBase::DispatchData PoolingKernelGPU_b_fs_zyx_fsv16_imad::SetDefault
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    runInfo.gws0 = x;
-    runInfo.gws1 = y * z;
+    dispatchData.gws[0] = x;
+    dispatchData.gws[1] = y * z;
      // we got b_fs_yx_fsv16 format, we process 16 features per workitem
-    runInfo.gws2 = CeilDiv(f, FEATURE_SLICE_SIZE) * b;
+    dispatchData.gws[2] = CeilDiv(f, FEATURE_SLICE_SIZE) * b;
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes({ runInfo.gws0, runInfo.gws1, runInfo.gws2 }, params.engineInfo);
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
  
      const size_t in_x_pitch = FEATURE_SLICE_SIZE;
      const size_t in_y_pitch = FEATURE_SLICE_SIZE * params.inputs[0].X().LogicalDimPadded();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h

index 8870a6f..fe16878 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h
@@ -36,6 +36,6 @@ public:
      }
  
  protected:
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp

index 4088e22..f3c4ea5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp
@@ -42,15 +42,15 @@ ParamsKey PoolingKernelGPUBfyxBlockOpt::GetSupportedKey() const {
  PoolingKernelBase::DispatchData PoolingKernelGPUBfyxBlockOpt::SetDefault(const pooling_params& params) const {
      const auto& output = params.output;
  
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
-    runInfo.gws1 = CeilDiv(output.Y().v, params.poolSize.y);
+    dispatchData.gws[1] = CeilDiv(output.Y().v, params.poolSize.y);
  
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants PoolingKernelGPUBfyxBlockOpt::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKernelGPUBfyxBlockOpt::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(
          MakeJitConstant("BLOCK_SIZE_Y", params.poolSize.y + params.poolSize.y * params.poolStride.y - 1));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h

index 4b77a84..b093a1a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h
@@ -28,7 +28,7 @@ public:
  
  protected:
      bool Validate(const Params&, const optional_params&) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      DispatchData SetDefault(const pooling_params& params) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp

index a0af34f..ef06a7e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp
@@ -50,22 +50,22 @@ ParamsKey Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetSupportedKey() const {
  }
  
  PoolingKernelBase::DispatchData Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::SetDefault(const pooling_params& params) const {
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
-    runInfo.gws0 = params.output.Feature().v/16;
-    runInfo.gws1 = params.output.X().v * params.output.Y().v;
-    runInfo.gws2 = params.output.Batch().v;
+    dispatchData.gws[0] = params.output.Feature().v/16;
+    dispatchData.gws[1] = params.output.X().v * params.output.Y().v;
+    dispatchData.gws[2] = params.output.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = SIMD_SIZE;
-    runInfo.efficiency = FORCE_PRIORITY_1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = SIMD_SIZE;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
  
      if (!params.fused_ops.empty()) {
          auto input_dt = EnableRound(params) ? Datatype::INT32 : GetActivationType(params);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h

index 4651dbd..5607b79 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h
@@ -36,6 +36,6 @@ public:
      }
  
  protected:
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp

index 93ae175..445312f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp
@@ -50,7 +50,7 @@ ParamsKey PoolingKernel_bsv16_fsv16::GetSupportedKey() const {
  }
  
  PoolingKernelBase::DispatchData PoolingKernel_bsv16_fsv16::SetDefault(const pooling_params& params) const {
-    DispatchData kd = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
      const auto& out = params.output;
  
@@ -60,17 +60,17 @@ PoolingKernelBase::DispatchData PoolingKernel_bsv16_fsv16::SetDefault(const pool
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = Align(f, feature_block_size);
-    kd.gws1 = x * y * z;
-    kd.gws2 = CeilDiv(b, batch_block_size);
+    dispatchData.gws[0] = Align(f, feature_block_size);
+    dispatchData.gws[1] = x * y * z;
+    dispatchData.gws[2] = CeilDiv(b, batch_block_size);
  
-    kd.lws0 = sub_group_size;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = sub_group_size;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    kd.efficiency = FORCE_PRIORITY_1;
+    dispatchData.efficiency = FORCE_PRIORITY_1;
  
-    return kd;
+    return dispatchData;
  }
  
  bool PoolingKernel_bsv16_fsv16::Validate(const Params& p, const optional_params& o) const {
@@ -98,10 +98,10 @@ bool PoolingKernel_bsv16_fsv16::Validate(const Params& p, const optional_params&
      return true;
  }
  
-JitConstants PoolingKernel_bsv16_fsv16::GetJitConstants(const pooling_params& params, DispatchData runInfo) const {
+JitConstants PoolingKernel_bsv16_fsv16::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
      auto input = params.inputs[0];
      auto output = params.output;
-    auto jit = PoolingKernelBase::GetJitConstants(params, runInfo);
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(MakeJitConstant("OC_BLOCK", feature_block_size));
      jit.AddConstant(MakeJitConstant("MB_BLOCK", batch_block_size));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h

index fc2ebc2..2e938b6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h
@@ -32,7 +32,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      DispatchData SetDefault(const pooling_params& params) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp

index b5d9e47..8cb55bb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp
@@ -41,15 +41,15 @@ ParamsKey PoolingKernelGPUByxfOpt::GetSupportedKey() const {
  PoolingKernelBase::DispatchData PoolingKernelGPUByxfOpt::SetDefault(const pooling_params& params) const {
      const auto& output = params.output;
  
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
-    runInfo.gws2 = output.Batch().v * (CeilDiv(output.Feature().v, 8));
+    dispatchData.gws[2] = output.Batch().v * (CeilDiv(output.Feature().v, 8));
  
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants PoolingKernelGPUByxfOpt::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKernelGPUByxfOpt::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
      jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
      jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h

index 5c65477..4bc0249 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h
@@ -28,7 +28,7 @@ public:
  
  protected:
      bool Validate(const Params&, const optional_params&) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      DispatchData SetDefault(const pooling_params& params) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp

index 655f164..2df5ab4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp
@@ -41,15 +41,15 @@ ParamsKey PoolingKernelGPUByxfPaddingOpt::GetSupportedKey() const {
  PoolingKernelBase::DispatchData PoolingKernelGPUByxfPaddingOpt::SetDefault(const pooling_params& params) const {
      const auto& output = params.output;
  
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
-    runInfo.gws2 = output.Batch().v * (CeilDiv(output.Feature().v, 8));
+    dispatchData.gws[2] = output.Batch().v * (CeilDiv(output.Feature().v, 8));
  
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants PoolingKernelGPUByxfPaddingOpt::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKernelGPUByxfPaddingOpt::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
      jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
      jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h

index f7566aa..9614953 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h
@@ -28,7 +28,7 @@ public:
  
  protected:
      bool Validate(const Params&, const optional_params&) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      DispatchData SetDefault(const pooling_params& params) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp

index 25ccfe1..62d570e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp
@@ -43,19 +43,19 @@ ParamsKey PoolingKerneGPU_fs_b_yx_fsv32::GetSupportedKey() const {
  }
  
  PoolingKernelBase::DispatchData PoolingKerneGPU_fs_b_yx_fsv32::SetDefault(const pooling_params& params) const {
-    DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+    DispatchData dispatchData = PoolingKernelBase::SetDefault(params);
  
-    runInfo.gws0 = params.output.X().v;  // X output blocks
-    runInfo.gws1 = params.output.Y().v;  // Y output clocks
+    dispatchData.gws[0] = params.output.X().v;  // X output blocks
+    dispatchData.gws[1] = params.output.Y().v;  // Y output clocks
      // in fs_b_yx_fsv32 format we will process 2 features per work item, so reads/writes are done in full writes for
      // fp16
-    runInfo.gws2 = RoundUp(params.output.Feature().v, 32) * params.output.Batch().v / 2;
+    dispatchData.gws[2] = RoundUp(params.output.Feature().v, 32) * params.output.Batch().v / 2;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 16;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 16;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool PoolingKerneGPU_fs_b_yx_fsv32::Validate(const Params& p, const optional_params& o) const {
@@ -74,8 +74,8 @@ bool PoolingKerneGPU_fs_b_yx_fsv32::Validate(const Params& p, const optional_par
      return true;
  }
  
-JitConstants PoolingKerneGPU_fs_b_yx_fsv32::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKerneGPU_fs_b_yx_fsv32::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
      auto pp = static_cast<const pooling_params&>(params);
  
      // Heurestic needed for very big pool size.
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h

index 5bb61fa..d224be0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h
@@ -29,7 +29,7 @@ public:
  
  protected:
      bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
                   FusedOpType::SCALE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp

index 9df0eba..66df152 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp
@@ -59,8 +59,8 @@ KernelsData PoolingKernelGPUInt8Ref::GetKernelsData(const Params& params, const
      return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
  }
  
-JitConstants PoolingKernelGPUInt8Ref::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    JitConstants jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKernelGPUInt8Ref::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    JitConstants jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
      jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
      jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h

index 6def2a4..aeae541 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h
@@ -27,7 +27,7 @@ public:
  
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
      bool Validate(const Params&, const optional_params&) const override;
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
          return { FusedOpType::QUANTIZE,
                   FusedOpType::SCALE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp

index 67dfa1d..8568b64 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp
@@ -41,8 +41,8 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const {
      return k;
  }
  
-JitConstants PoolingKernelGPURef::GetJitConstants(const pooling_params& params, DispatchData kd) const {
-    auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+JitConstants PoolingKernelGPURef::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const {
+    auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData);
      jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
      jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h

index e42bcc8..4afdbad 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h
@@ -32,6 +32,6 @@ public:
      }
  
  protected:
-    JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp

index 4f40013..05da60c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018-2019 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -31,24 +31,10 @@ JitConstants PyramidROIAlignKernelBase::GetJitConstants(const PyramidROIAlign_pa
  }
  
  PyramidROIAlignKernelBase::DispatchData PyramidROIAlignKernelBase::SetDefault(const PyramidROIAlign_params& params) const {
-    DispatchData kd;
-
-    kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
-    std::vector<size_t> global;
-    global = {1, 1, 1};
-
-    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    DispatchData dispatchData;
+    dispatchData.gws = {1, 1, 1};
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+    return dispatchData;
  }
  
  KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params,
@@ -58,7 +44,7 @@ KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params
  
      const auto& prim_params =
          static_cast<const PyramidROIAlign_params&>(params);
-    auto run_info = SetDefault(prim_params);
+    auto dispatchData = SetDefault(prim_params);
      KernelData k_data = KernelData::Default<PyramidROIAlign_params>(params);
      auto cldnn_jit = GetJitConstants(prim_params);
      auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
@@ -66,7 +52,7 @@ KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params
  
      auto& kernel = k_data.kernels[0];
      FillCLKernelData(kernel,
-                     run_info,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp

index 5da8914..d9446c9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -42,24 +42,16 @@ ParamsKey PyramidROIAlignKernelRef::GetSupportedKey() const {
  }
  
  PyramidROIAlignKernelBase::DispatchData PyramidROIAlignKernelRef::SetDefault(const PyramidROIAlign_params& params) const {
-    auto dispatch = PyramidROIAlignKernelBase::SetDefault(params);
+    auto dispatchData = PyramidROIAlignKernelBase::SetDefault(params);
  
-    std::vector<size_t> global = {
+    dispatchData.gws = {
          params.output.X().v * params.output.Y().v,
          params.output.Feature().v,
          params.output.Batch().v };
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    dispatch.gws0 = global[0];
-    dispatch.gws1 = global[1];
-    dispatch.gws2 = global[2];
-
-    dispatch.lws0 = local[0];
-    dispatch.lws1 = local[1];
-    dispatch.lws2 = local[2];
-
-    return dispatch;
+    return dispatchData;
  }
  
  KernelsData PyramidROIAlignKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp

index 5ec6054..d52551c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp
@@ -33,7 +33,7 @@ bool QuantizeKernelBase::Validate(const Params& p, const optional_params&) const
      return true;
  }
  
-JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const {
+JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const {
      JitConstants jit = MakeBaseParamsJitConstants(params);
  
      if (params.packed_binary_output) {
@@ -55,9 +55,9 @@ JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params,
  
      jit.AddConstant(MakeJitConstant("LEVELS", static_cast<float>(params.levels)));
  
-    jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0));
-    jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
-    jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
+    jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0]));
+    jit.AddConstant(MakeJitConstant("LWS_1", dispatchData.lws[1]));
+    jit.AddConstant(MakeJitConstant("LWS_2", dispatchData.lws[2]));
  
      return jit;
  }
@@ -72,15 +72,15 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
          return {};
      }
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-    auto cldnn_jit = GetJitConstants(newParams, runInfo);
+    auto cldnn_jit = GetJitConstants(newParams, dispatchData);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local = dispatchData.lws;
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
      kernel.arguments = GetArgsDesc(static_cast<int>(newParams.inputs.size()), false, false);
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h

index 480e786..c03ef65 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h
@@ -29,7 +29,7 @@ public:
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
  
  protected:
-    virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const;
+    virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const;
      virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp

index 27fe85f..61443bd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -12,8 +12,6 @@
  // See the License for the specific language governing permissions and
  // limitations under the License.
  
-
-#include <iostream>
  #include "quantize_kernel_ref.h"
  #include "kernel_selector_utils.h"
  #include <string>
@@ -41,35 +39,33 @@ ParamsKey QuantizeKernelRef::GetSupportedKey() const {
  }
  
  CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
      auto output = params.output;
  
      if (output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) {
-        runInfo.gws0 = output.Batch().v;
-        runInfo.gws1 = Align(output.Feature().v, sub_group_size);
-        runInfo.gws2 = output.Y().v * output.X().v * output.Z().v;
+        dispatchData.gws[0] = output.Batch().v;
+        dispatchData.gws[1] = Align(output.Feature().v, sub_group_size);
+        dispatchData.gws[2] = output.Y().v * output.X().v * output.Z().v;
  
-        runInfo.lws0 = 1;
-        runInfo.lws1 = sub_group_size;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = 1;
+        dispatchData.lws[1] = sub_group_size;
+        dispatchData.lws[2] = 1;
      } else {
-        runInfo.gws0 = output.Batch().v;
-        runInfo.gws1 = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v;
-        runInfo.gws2 = Align(output.X().v * output.Y().v * output.Z().v, 16);
+        dispatchData.gws[0] = output.Batch().v;
+        dispatchData.gws[1] = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v;
+        dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v, 16);
  
-        runInfo.lws0 = 1;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 16;
+        dispatchData.lws[0] = 1;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 16;
      }
  
-    runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
      if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) {
          jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h

index f0263b2..5e9bfab 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h
@@ -26,7 +26,7 @@ public:
      QuantizeKernelRef() : QuantizeKernelBase("quantize_gpu_ref") {}
      virtual ~QuantizeKernelRef() {}
  
-    JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override;
+    JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override;
      CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      ParamsKey GetSupportedKey() const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp

index 8023c56..cd29dbf 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -61,38 +61,28 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
  }
  
  CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
      auto output = params.output;
  
      if (output.GetLayout() == DataLayout::b_fs_yx_fsv16) {
-        runInfo.gws0 = output.Y().v * output.X().v;
-        runInfo.gws1 = Align(output.Feature().v, sub_group_size);
-        runInfo.gws2 = output.Batch().v;
+        dispatchData.gws[0] = output.Y().v * output.X().v;
+        dispatchData.gws[1] = Align(output.Feature().v, sub_group_size);
+        dispatchData.gws[2] = output.Batch().v;
  
-        runInfo.lws0 = 1;
-        runInfo.lws1 = sub_group_size;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = 1;
+        dispatchData.lws[1] = sub_group_size;
+        dispatchData.lws[2] = 1;
      } else {
-        auto global = GetTensorFriendlyWorkGroups(output);
-        auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-        runInfo.gws0 = global[0];
-        runInfo.gws1 = global[1];
-        runInfo.gws2 = global[2];
-
-        runInfo.lws0 = local[0];
-        runInfo.lws1 = local[1];
-        runInfo.lws2 = local[2];
+        dispatchData.gws = GetTensorFriendlyWorkGroups(output);
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
      }
  
-    runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const {
-    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const {
+    JitConstants jit = Parent::GetJitConstants(params, dispatchData);
  
      if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16) {
          jit.AddConstant(MakeJitConstant("GWS_BATCH", 2));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h

index d88dfb3..ac07815 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h
@@ -26,7 +26,7 @@ public:
      QuantizeKernelScaleShift() : QuantizeKernelBase("quantize_gpu_scale_shift_opt") {}
      virtual ~QuantizeKernelScaleShift() {}
  
-    JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override;
+    JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override;
      CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      ParamsKey GetSupportedKey() const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp

index 56cf279..5548d52 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp
@@ -72,22 +72,15 @@ ParamsKey ReduceKernel_b_fs_yx_fsv16::GetSupportedKey() const {
  }
  
  CommonDispatchData ReduceKernel_b_fs_yx_fsv16::SetDefault(const reduce_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
      auto in_dims = calc_in_dims(params);
-    std::vector<size_t> global = {16,
-                                  CeilDiv(in_dims[3].v, calc_read_offset(params)) * in_dims[2].v,  // X, Y
-                                  CeilDiv(in_dims[1].v, SIMD) * in_dims[0].v};                     // F, B
+    dispatchData.gws = { 16,
+                         CeilDiv(in_dims[3].v, calc_read_offset(params)) * in_dims[2].v,  // X, Y
+                         CeilDiv(in_dims[1].v, SIMD) * in_dims[0].v };                    // F, B
+    dispatchData.lws = { SIMD, 1, 1 };
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = SIMD;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ReduceKernel_b_fs_yx_fsv16::GetJitConstants(const reduce_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp

index 526080e..3db770f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp
@@ -235,7 +235,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
      }
  
      const reduce_params& params = static_cast<const reduce_params&>(p);
-    DispatchData runInfo = SetDefault(params, options);
+    DispatchData dispatchData = SetDefault(params, options);
  
      KernelData kd = KernelData::Default<reduce_params>(params);
  
@@ -245,7 +245,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
  
      auto& kernel = kd.kernels[0];
      FillCLKernelData(kernel,
-                     runInfo,
+                     dispatchData,
                       params.engineInfo,
                       kernelName,
                       jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp

index ca26a37..5cd1f6d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp
@@ -43,23 +43,14 @@ ParamsKey ReduceKernelRef::GetSupportedKey() const {
  }
  
  CommonDispatchData ReduceKernelRef::SetDefault(const reduce_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = {params.output.X().v * params.output.Y().v,
-                                  params.output.Z().v * params.output.W().v,
-                                  params.output.Batch().v * params.output.Feature().v};
+    dispatchData.gws = { params.output.X().v * params.output.Y().v,
+                         params.output.Z().v * params.output.W().v,
+                         params.output.Batch().v * params.output.Feature().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ReduceKernelRef::GetJitConstants(const reduce_params& params) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp

index aa66932..a253aff 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -47,35 +47,23 @@ JitConstants RegionYoloKernelRef::GetJitConstants(const region_yolo_params& ry)
  }
  
  RegionYoloKernelRef::DispatchData SetDefault(const region_yolo_params& params) {
-    RegionYoloKernelRef::DispatchData kd;
-
-    kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16);
+    RegionYoloKernelRef::DispatchData dispatchData;
  
      const auto& input = params.inputs[0];
-    std::vector<size_t> global;
      if (input.GetLayout() == DataLayout::bfyx) {
-        global = {input.X().v * input.Y().v, 1, 1};
+        dispatchData.gws = {input.X().v * input.Y().v, 1, 1};
      } else {
-        global = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v};
+        dispatchData.gws = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v};
      }
-    // Determine global work sizes.
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    return kd;
+    return dispatchData;
  }
  KernelsData RegionYoloKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
      assert(params.GetType() == KernelType::REGION_YOLO);
      const region_yolo_params& orgParams = static_cast<const region_yolo_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
      KernelData kd = KernelData::Default<region_yolo_params>(params);
  
      auto cldnn_jit = GetJitConstants(orgParams);
@@ -83,7 +71,7 @@ KernelsData RegionYoloKernelRef::GetKernelsData(const Params& params, const opti
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = FORCE_PRIORITY_9;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp

index e746526..2238996 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -45,21 +45,21 @@ JitConstants ReorderFromWinograd2x3Kernel::GetJitConstants(const reorder_params&
  
  ReorderFromWinograd2x3Kernel::DispatchData ReorderFromWinograd2x3Kernel::SetDefault(
      const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      constexpr auto output_tile_width = 2;  // by definition of F(2,3)
      const auto& input = params.inputs[0];
      const auto& output = params.output;
  
-    kd.gws0 = static_cast<size_t>(output.Feature().v * output.Batch().v);
-    kd.gws1 = static_cast<size_t>(output.X().v / output_tile_width);
-    kd.gws2 = static_cast<size_t>(output.Y().v);
+    dispatchData.gws[0] = static_cast<size_t>(output.Feature().v * output.Batch().v);
+    dispatchData.gws[1] = static_cast<size_t>(output.X().v / output_tile_width);
+    dispatchData.gws[2] = static_cast<size_t>(output.Y().v);
  
-    kd.lws0 = input.Feature().v > 32 ? 32 : static_cast<size_t>(input.Feature().v);
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = input.Feature().v > 32 ? 32 : static_cast<size_t>(input.Feature().v);
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderFromWinograd2x3Kernel::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp

index 43491a2..ded7b6a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -151,26 +151,16 @@ JitConstants ReorderKernelBase::GetJitConstants(const reorder_params& params) co
  ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_weights_params& params) const {
      const auto& out = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    std::vector<size_t> global(3);
+    dispatchData.gws = { out.G().v * out.OFM().v, out.IFM().v, out.X().v * out.Y().v * out.Z().v };
+    dispatchData.lws= GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    global = {out.G().v * out.OFM().v, out.IFM().v, out.X().v * out.Y().v * out.Z().v};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      auto& input = params.inputs[0];
      DataTensor input_tensor = input;
@@ -183,36 +173,28 @@ ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_para
          input_tensor = DataTensor(input_sizes, input.GetDType(), DataLayout::image_2d_rgba);
      }
  
-    auto global = GetTensorFriendlyWorkGroups(input_tensor);
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.gws = GetTensorFriendlyWorkGroups(input_tensor);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
      if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) {
          std::vector<size_t> sizes = { 32, 16, 8, 4 };
          for (auto& s : sizes) {
-            if (kd.gws2 % s == 0) {
-                kd.lws0 = 1;
-                kd.lws1 = 1;
-                kd.lws2 = s;
+            if (dispatchData.gws[2] % s == 0) {
+                dispatchData.lws[0] = 1;
+                dispatchData.lws[1] = 1;
+                dispatchData.lws[2] = s;
                  break;
              }
          }
      }
  
      if (params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 && params.inputs[0].Feature().v % 16 == 0) {
-        kd.lws0 = 1;
-        kd.lws1 = 16;
-        kd.lws2 = 1;
+        dispatchData.lws[0] = 1;
+        dispatchData.lws[1] = 16;
+        dispatchData.lws[2] = 1;
      }
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params& params, const optional_params& options, float estimated_time) const {
@@ -223,9 +205,9 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params
      KernelData kd = KernelData::Default<reorder_weights_params>(params);
      reorder_weights_params& newParams = *static_cast<reorder_weights_params*>(kd.params.get());
  
-    DispatchData runInfo;
+    DispatchData dispatchData;
  
-    runInfo = SetDefault(newParams);
+    dispatchData = SetDefault(newParams);
  
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
@@ -233,7 +215,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kernel.arguments = GetArgsDesc(1, false, false);
  
@@ -251,9 +233,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
      KernelData kd = KernelData::Default<reorder_params>(params);
      reorder_params& newParams = *static_cast<reorder_params*>(kd.params.get());
  
-    DispatchData runInfo;
-
-    runInfo = SetDefault(newParams);
+    DispatchData dispatchData = SetDefault(newParams);
  
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
@@ -261,7 +241,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kernel.arguments = GetArgsDesc(1, false, false);
      if (newParams.mode == MeanSubtractMode::IN_BUFFER) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp

index c4a651f..10d4c1e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -60,22 +60,14 @@ JitConstants ReorderKernelBinary::GetJitConstants(const reorder_params& params)
  }
  
  ReorderKernelBinary::DispatchData ReorderKernelBinary::SetDefault(const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.inputs[0];
  
-    std::vector<size_t> global{input.Batch().v, CeilDiv(input.Feature().v, 32), input.Y().v * input.X().v};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { input.Batch().v, CeilDiv(input.Feature().v, 32), input.Y().v * input.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderKernelBinary::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp

index 9bdc21c..f71a2ab 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -92,21 +92,21 @@ JitConstants ReorderKernelFastBatch1::GetJitConstants(const reorder_params& para
  }
  
  ReorderKernelFastBatch1::DispatchData ReorderKernelFastBatch1::SetDefault(const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& output = params.output;
  
      unsigned int gws = (unsigned int)output.LogicalSize();
  
-    kd.gws0 = Align(gws, 32);
-    kd.gws1 = 1;
-    kd.gws2 = 1;
+    dispatchData.gws[0] = Align(gws, 32);
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    kd.lws0 = 32;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 32;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderKernelFastBatch1::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp

index 2db6641..83c80b2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -68,19 +68,19 @@ JitConstants ReorderKernel_fs_b_yx_fsv32_to_bfyx::GetJitConstants(const reorder_
  }
  
  ReorderKernelBase::DispatchData ReorderKernel_fs_b_yx_fsv32_to_bfyx::SetDefault(const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      auto x_aligned = Align(params.output.X().v, x_block_align);
  
-    kd.gws0 = params.output.Batch().v;
-    kd.gws1 = Align(params.output.Feature().v, fsv);
-    kd.gws2 = params.output.Y().v * x_aligned / GetOptimalSize(x_aligned, optimal_x_sizes);
+    dispatchData.gws[0] = params.output.Batch().v;
+    dispatchData.gws[1] = Align(params.output.Feature().v, fsv);
+    dispatchData.gws[2] = params.output.Y().v * x_aligned / GetOptimalSize(x_aligned, optimal_x_sizes);
  
-    kd.lws0 = 1;
-    kd.lws1 = GetOptimalSize(kd.gws1, optimal_feature_sizes);
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = GetOptimalSize(dispatchData.gws[1], optimal_feature_sizes);
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderKernel_fs_b_yx_fsv32_to_bfyx::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp

index 8e2a284..0874f57 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -67,21 +67,21 @@ JitConstants ReorderKernel_to_yxfb_batched::GetJitConstants(const reorder_params
  }
  
  ReorderKernelBase::DispatchData ReorderKernel_to_yxfb_batched::SetDefault(const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.inputs[0];
  
      unsigned int gws = (unsigned int)input.LogicalSize();
  
-    kd.gws0 = Align(gws, 8 * input.Batch().v) / input.Batch().v;
-    kd.gws1 = 1;
-    kd.gws2 = 1;
+    dispatchData.gws[0] = Align(gws, 8 * input.Batch().v) / input.Batch().v;
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    kd.lws0 = 8;
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = 8;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderKernel_to_yxfb_batched::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp

index 26f3dae..906bd56 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -43,20 +43,20 @@ JitConstants ReorderToWinograd2x3Kernel::GetJitConstants(const reorder_params& p
  }
  
  ReorderToWinograd2x3Kernel::DispatchData ReorderToWinograd2x3Kernel::SetDefault(const reorder_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.inputs[0];
      const auto& output = params.output;
  
-    kd.gws0 = static_cast<size_t>(input.Feature().v * input.Batch().v);
-    kd.gws1 = static_cast<size_t>(params.winograd_nr_tiles_x);
-    kd.gws2 = static_cast<size_t>(output.Y().v);
+    dispatchData.gws[0] = static_cast<size_t>(input.Feature().v * input.Batch().v);
+    dispatchData.gws[1] = static_cast<size_t>(params.winograd_nr_tiles_x);
+    dispatchData.gws[2] = static_cast<size_t>(output.Y().v);
  
-    kd.lws0 = input.Feature().v > 32 ? 32 : static_cast<size_t>(input.Feature().v);
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[0] = input.Feature().v > 32 ? 32 : static_cast<size_t>(input.Feature().v);
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderToWinograd2x3Kernel::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp

index 3e86a5f..8012dc9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -34,20 +34,12 @@ ReorderWeightsBinaryKernel::DispatchData ReorderWeightsBinaryKernel::SetDefault(
      const reorder_weights_params& params) const {
      const auto& out = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    std::vector<size_t> global = {out.OFM().v, CeilDiv(out.IFM().v, 32), out.X().v * out.Y().v};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.gws = { out.OFM().v, CeilDiv(out.IFM().v, 32), out.X().v * out.Y().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderWeightsBinaryKernel::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp

index 28b6b4e..24a2194 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -33,26 +33,15 @@ ParamsKey ReorderWeightsImage_fyx_b_Kernel::GetSupportedKey() const {
      return k;
  }
  
-ReorderWeightsImage_fyx_b_Kernel::DispatchData ReorderWeightsImage_fyx_b_Kernel::SetDefault(
-    const reorder_weights_params& params) const {
+ReorderWeightsImage_fyx_b_Kernel::DispatchData ReorderWeightsImage_fyx_b_Kernel::SetDefault(const reorder_weights_params& params) const {
      const auto& out = params.output;
  
-    DispatchData kd;
+    DispatchData dispatchData;
  
-    std::vector<size_t> global(3);
+    dispatchData.gws = { out.OFM().v, Align(out.X().v * out.Y().v * out.IFM().v, 4) / 4, 1 };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    global = {out.OFM().v, Align(out.X().v * out.Y().v * out.IFM().v, 4) / 4, 1};
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderWeightsImage_fyx_b_Kernel::GetKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp

index 467fa07..d9f8ba5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -35,19 +35,19 @@ ParamsKey ReorderWeightsImageWinograd6x3Kernel::GetSupportedKey() const {
  
  ReorderWeightsImageWinograd6x3Kernel::DispatchData ReorderWeightsImageWinograd6x3Kernel::SetDefault(
      const reorder_weights_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.input;
  
-    kd.gws0 = 1;
-    kd.gws1 = 3;
-    kd.gws2 = static_cast<size_t>(input.IFM().v * input.OFM().v);
+    dispatchData.gws[0] = 1;
+    dispatchData.gws[1] = 3;
+    dispatchData.gws[2] = static_cast<size_t>(input.IFM().v * input.OFM().v);
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = 32;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 32;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderWeightsImageWinograd6x3Kernel::GetKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp

index 09b0d77..32536f9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp
@@ -107,7 +107,7 @@ static inline size_t GetOptimalSize(size_t val, std::vector<size_t> optimal_size
  
  ReorderWeightsOpt::DispatchData ReorderWeightsOpt::SetDefault(
      const reorder_weights_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& output = params.output;
      const auto output_layout = output.GetLayout();
@@ -123,22 +123,19 @@ ReorderWeightsOpt::DispatchData ReorderWeightsOpt::SetDefault(
      const auto ifm_block = (osv_first) ? ifm_block_supported ? GetOptimalSize(output.IFM().v, preferred_sizes) : 1
                                         : subgroup_size;
  
-    std::vector<size_t> global;
      if (osv_first) {
-        global = {output.G().v * (output.IFM().v / ifm_block), output.Z().v * output.Y().v * output.X().v, Align(output.OFM().v, ofm_block)};
+        dispatchData.gws = { output.G().v * (output.IFM().v / ifm_block),
+                             output.Z().v * output.Y().v * output.X().v,
+                             Align(output.OFM().v, ofm_block) };
      } else {
-        global = {output.G().v * (output.OFM().v / ofm_block), output.Z().v * output.Y().v * output.X().v, Align(output.IFM().v, ifm_block)};
+        dispatchData.gws = { output.G().v * (output.OFM().v / ofm_block),
+                             output.Z().v * output.Y().v * output.X().v,
+                             Align(output.IFM().v, ifm_block) };
      }
  
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
+    dispatchData.lws = { 1, 1, 16 };
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = 16;
-
-    return kd;
+    return dispatchData;
  }
  
  JitConstants ReorderWeightsOpt::GetJitConstants(const reorder_weights_params& params) const {
@@ -174,7 +171,7 @@ bool ReorderWeightsOpt::Validate(const Params& params, const optional_params& /*
      const auto& p = static_cast<const reorder_weights_params&>(params);
      const auto& input = p.input;
      const auto& output = p.output;
-    
+
      if (input.GroupedLayout() != output.GroupedLayout()) {
          return false;
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp

index 84ad96b..cca683f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -35,19 +35,19 @@ ParamsKey ReorderWeightsWinograd2x3Kernel::GetSupportedKey() const {
  
  ReorderWeightsWinograd2x3Kernel::DispatchData ReorderWeightsWinograd2x3Kernel::SetDefault(
      const reorder_weights_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.input;
  
-    kd.gws0 = 1;
-    kd.gws1 = 3;
-    kd.gws2 = static_cast<size_t>(input.IFM().v * input.OFM().v);
+    dispatchData.gws[0] = 1;
+    dispatchData.gws[1] = 3;
+    dispatchData.gws[2] = static_cast<size_t>(input.IFM().v * input.OFM().v);
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = 32;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 32;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderWeightsWinograd2x3Kernel::GetKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp

index 2ac9dd0..b9355d6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -34,19 +34,19 @@ ParamsKey ReorderWeightsWinograd6x3Kernel::GetSupportedKey() const {
  
  ReorderWeightsWinograd6x3Kernel::DispatchData ReorderWeightsWinograd6x3Kernel::SetDefault(
      const reorder_weights_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& input = params.input;
  
-    kd.gws0 = 1;
-    kd.gws1 = 3;
-    kd.gws2 = static_cast<size_t>(input.IFM().v * input.OFM().v);
+    dispatchData.gws[0] = 1;
+    dispatchData.gws[1] = 3;
+    dispatchData.gws[2] = static_cast<size_t>(input.IFM().v * input.OFM().v);
  
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = 32;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 32;
  
-    return kd;
+    return dispatchData;
  }
  
  KernelsData ReorderWeightsWinograd6x3Kernel::GetKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp

index 3e06aec..8f55732 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -44,35 +44,23 @@ JitConstants ReorgYoloKernelRef::GetJitConstants(const reorg_yolo_params& ry) co
      return jit;
  }
  ReorgYoloKernelRef::DispatchData SetDefault(const reorg_yolo_params& params) {
-    ReorgYoloKernelRef::DispatchData kd;
-
-    kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16);
+    ReorgYoloKernelRef::DispatchData dispatchData;
  
      const auto& input = params.inputs[0];
-    std::vector<size_t> global;
      if (input.GetLayout() == DataLayout::bfyx) {
-        global = {input.X().v, input.Y().v, input.Feature().v};
+        dispatchData.gws = {input.X().v, input.Y().v, input.Feature().v};
      } else {
-        global = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v};
+        dispatchData.gws = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v};
      }
-    // Determine global work sizes.
-    kd.gws0 = global[0];
-    kd.gws1 = global[1];
-    kd.gws2 = global[2];
-
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    return kd;
+    return dispatchData;
  }
  KernelsData ReorgYoloKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
      assert(params.GetType() == KernelType::REORG_YOLO);
      const reorg_yolo_params& orgParams = static_cast<const reorg_yolo_params&>(params);
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
      KernelData kd = KernelData::Default<reorg_yolo_params>(params);
  
      auto cldnn_jit = GetJitConstants(orgParams);
@@ -80,7 +68,7 @@ KernelsData ReorgYoloKernelRef::GetKernelsData(const Params& params, const optio
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = FORCE_PRIORITY_9;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp

index 6f933f4..1ff3913 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp
@@ -58,40 +58,29 @@ size_t ResampleKernelBase::GetFeatureBlockSize(const resample_params& params) co
  }
  
  ResampleKernelBase::DispatchData ResampleKernelBase::SetDefault(const kernel_selector::resample_params &arg) const {
-    DispatchData runInfo;
-    std::vector<size_t> global;
-    std::vector<size_t> local;
+    DispatchData dispatchData;
      const auto& out = arg.output;
  
      if (arg.resampleType == ResampleType::NEAREST_NEIGHBOR)
-        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+        dispatchData.gws = { out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v };
      else if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX)
-        global = {Align(out.X().v, 32), out.Y().v, out.Batch().v};
+        dispatchData.gws = { Align(out.X().v, 32), out.Y().v, out.Batch().v };
      else if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP)
-        global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v};
+        dispatchData.gws = { out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v };
      else
-        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+        dispatchData.gws = { out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v };
  
-    local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo);
  
      if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) {
-        local[0] = 32;
-        local[1] = 1;
-        local[2] = 1;
+        dispatchData.lws[0] = 32;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
+    dispatchData.efficiency = FORCE_PRIORITY_7;
  
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    runInfo.efficiency = FORCE_PRIORITY_7;
-    runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16;
-
-    return runInfo;
+    return dispatchData;
  }
  
  bool ResampleKernelBase::Validate(const Params& p, const optional_params& o) const {
@@ -227,16 +216,16 @@ KernelsData ResampleKernelBase::GetCommonKernelsData(const Params& params, const
      KernelData kd = KernelData::Default<resample_params>(params);
      resample_params& newParams = *static_cast<resample_params*>(kd.params.get());
  
-    auto runInfo = SetDefault(newParams);
+    auto dispatchData = SetDefault(newParams);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
                       DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
  
      return {kd};
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp

index 9a74a61..da201ed 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp
@@ -52,21 +52,20 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
  }
  
  ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_selector::resample_params &arg) const {
-    DispatchData runInfo;
+    DispatchData dispatchData;
      const auto& out = arg.output;
  
-    runInfo.gws0 = CeilDiv(out.X().v, GetOptimalBlockSize(arg)) * out.Y().v;
-    runInfo.gws1 = Align(out.Feature().v, sub_group_size);
-    runInfo.gws2 = arg.output.Batch().v;
+    dispatchData.gws[0] = CeilDiv(out.X().v, GetOptimalBlockSize(arg)) * out.Y().v;
+    dispatchData.gws[1] = Align(out.Feature().v, sub_group_size);
+    dispatchData.gws[2] = arg.output.Batch().v;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = sub_group_size;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
  
-    runInfo.efficiency = FORCE_PRIORITY_3;
-    runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16;
+    dispatchData.efficiency = FORCE_PRIORITY_3;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp

index 90069a7..eb66fba 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -123,25 +123,14 @@ JitConstants ResampleKernelRef::GetJitConstants(const resample_params& params) c
  }
  
  ResampleKernelBase::DispatchData ResampleKernelRef::SetDefault(const resample_params& arg) const {
-    auto dispatch = Parent::SetDefault(arg);
+    auto dispatchData = Parent::SetDefault(arg);
  
      if (use_packing(arg)) {
          auto pack = packing_factor(arg);
-        std::vector<size_t> global;
-        std::vector<size_t> local;
-
-        global = { arg.output.X().v, arg.output.Y().v * arg.output.Z().v, CeilDiv(arg.output.Feature().v, pack) * arg.output.Batch().v };
-        local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
-
-        dispatch.gws0 = global[0];
-        dispatch.gws1 = global[1];
-        dispatch.gws2 = global[2];
-
-        dispatch.lws0 = local[0];
-        dispatch.lws1 = local[1];
-        dispatch.lws2 = local[2];
+        dispatchData.gws = { arg.output.X().v, arg.output.Y().v * arg.output.Z().v, CeilDiv(arg.output.Feature().v, pack) * arg.output.Batch().v };
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo);
      }
  
-    return dispatch;
+    return dispatchData;
  }
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp

index f3926a7..bcd95a8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -41,23 +41,15 @@ ParamsKey ReverseSequenceKernelRef::GetSupportedKey() const {
  
  CommonDispatchData ReverseSequenceKernelRef::SetDefault(const reverse_sequence_params& params,
                                                          const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = {params.output.Batch().v,
-                                  params.output.Feature().v,
-                                  params.output.Y().v * params.output.X().v};
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Y().v * params.output.X().v };
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ReverseSequenceKernelRef::GetJitConstants(const reverse_sequence_params& params) const {
@@ -75,14 +67,14 @@ KernelsData ReverseSequenceKernelRef::GetKernelsData(const Params& params, const
  
      assert(params.GetType() == KernelType::REVERSE_SEQUENCE);
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp

index b084ac4..1dbba05 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -18,24 +18,22 @@
  namespace kernel_selector {
  
  static ROIPoolingKernelBase::DispatchData SetDefault(const roi_pooling_params& params) {
-    ROIPoolingKernelBase::DispatchData kd;
-
-    kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16);
+    ROIPoolingKernelBase::DispatchData dispatchData;
  
      // Determine global work sizes.
-    kd.gws0 = params.output.LogicalSize();
-    kd.gws1 = 1;
-    kd.gws2 = 1;
+    dispatchData.gws[0] = params.output.LogicalSize();
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
      // Find largest positive local work size that is divider for global work size.
-    kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-    while (kd.gws0 % kd.lws0 != 0) {
-        --kd.lws0;
+    dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
+    while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
+        --dispatchData.lws[0];
      }
-    kd.lws1 = 1;
-    kd.lws2 = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    return kd;
+    return dispatchData;
  }
  
  JitConstants ROIPoolingKernelBase::GetJitConstants(const roi_pooling_params& rp) const {
@@ -59,7 +57,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params,
          return {};
      }
  
-    DispatchData runInfo = SetDefault(orgParams);
+    DispatchData dispatchData = SetDefault(orgParams);
      KernelData kd = KernelData::Default<roi_pooling_params>(params);
  
      auto cldnn_jit = GetJitConstants(orgParams);
@@ -67,7 +65,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params,
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
      kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
      if (orgParams.mode == PoolType::DEFORMABLE_BILINEAR && !orgParams.no_trans)
          kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2});
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp

index 352db1e..af73c0c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp
@@ -87,7 +87,7 @@ static inline std::string GetOrderString(std::vector<std::string>& order) {
      std::string order_str = order[0];
      for (size_t i = 1; i < order.size(); i++)
          order_str += ", " + order[i];
-    
+
      return order_str;
  }
  
@@ -114,7 +114,7 @@ static std::string GetUpdatesIndexOrder(const scatter_update_params& params, siz
      std::string FYX_indices_size = "(INPUT1_FEATURE_NUM * INPUT1_SIZE_Y * INPUT1_SIZE_X)";
      std::string YX_indices_size = "(INPUT1_SIZE_Y * INPUT1_SIZE_X)";
      std::string X_indices_size = "(INPUT1_SIZE_X)";
-    
+
      // Shift indices of ScatterUpdate updates input related to Indices dims
      for (size_t i = default_order.size() - 1; i > (axis + indices_non_empty_dims - 1); i--)
          default_order[i] = default_order[i - indices_non_empty_dims + 1];
@@ -141,76 +141,65 @@ static std::string GetUpdatesIndexOrder(const scatter_update_params& params, siz
  }
  
  CommonDispatchData ScatterUpdateKernelRef::SetDefault(const scatter_update_params& params, const optional_params&, bool is_second) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
      const auto& output = params.output;
  
-    std::vector<size_t> global(3);
      const size_t indices_size = params.inputs[1].LogicalSize();
  
      switch (params.inputs[0].GetLayout()) {
      case DataLayout::bfyx:
-        global = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v};
+        dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v};
          if (is_second) {
              if (params.axis == ScatterUpdateAxis::BATCH)
-                global[2] = indices_size * output.Feature().v;
+                dispatchData.gws[2] = indices_size * output.Feature().v;
              else if (params.axis == ScatterUpdateAxis::FEATURE)
-                global[2] = indices_size * output.Batch().v;
+                dispatchData.gws[2] = indices_size * output.Batch().v;
              else if (params.axis == ScatterUpdateAxis::Y)
-                global[1] = indices_size;
+                dispatchData.gws[1] = indices_size;
              else
-                global[0] = indices_size;
+                dispatchData.gws[0] = indices_size;
          }
          break;
  
      case DataLayout::bfzyx:
-        global = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v};
+        dispatchData.gws = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v};
          if (is_second) {
              if (params.axis == ScatterUpdateAxis::BATCH)
-                global[2] = indices_size * output.Feature().v;
+                dispatchData.gws[2] = indices_size * output.Feature().v;
              else if (params.axis == ScatterUpdateAxis::FEATURE)
-                global[2] = indices_size * output.Batch().v;
+                dispatchData.gws[2] = indices_size * output.Batch().v;
              else if (params.axis == ScatterUpdateAxis::Z)
-                global[1] = indices_size;
+                dispatchData.gws[1] = indices_size;
              else if (params.axis == ScatterUpdateAxis::Y)
-                global[0] = indices_size * output.X().v;
+                dispatchData.gws[0] = indices_size * output.X().v;
              else
-                global[0] = indices_size * output.Y().v;
+                dispatchData.gws[0] = indices_size * output.Y().v;
          }
          break;
  
      case DataLayout::bfwzyx:
-        global = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
+        dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
          if (is_second) {
              if (params.axis == ScatterUpdateAxis::BATCH)
-                global[2] = indices_size * output.Feature().v;
+                dispatchData.gws[2] = indices_size * output.Feature().v;
              else if (params.axis == ScatterUpdateAxis::FEATURE)
-                global[2] = indices_size * output.Batch().v;
+                dispatchData.gws[2] = indices_size * output.Batch().v;
              else if (params.axis == ScatterUpdateAxis::Z)
-                global[1] = indices_size * output.W().v;
+                dispatchData.gws[1] = indices_size * output.W().v;
              else if (params.axis == ScatterUpdateAxis::W)
-                global[1] = indices_size * output.Z().v;
+                dispatchData.gws[1] = indices_size * output.Z().v;
              else if (params.axis == ScatterUpdateAxis::Y)
-                global[0] = indices_size * output.X().v;
+                dispatchData.gws[0] = indices_size * output.X().v;
              else
-                global[0] = indices_size * output.Y().v;
+                dispatchData.gws[0] = indices_size * output.Y().v;
          }
          break;
      default: break;
      }
-    
-    std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-    
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
  
-    runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    return runInfo;
+    return dispatchData;
  }
  
  static std::string GetOutputIndexOnAxis(const scatter_update_params& params, size_t axis) {
@@ -270,7 +259,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
      const scatter_update_params& orgParams = static_cast<const scatter_update_params&>(params);
      const size_t indices_size = orgParams.inputs[1].LogicalSize();
      int start_with_iteration = 0;
-    
+
      // if dim of output along axis is equal to logical size of indices, we miss copying kernel
      if (orgParams.inputs[0].Extract(orgParams.inputs[0].GetLayout(), Tensor::DataChannelName(orgParams.axis), orgParams.inputs[0].GetDims()).v == indices_size) {
          start_with_iteration = 1;
@@ -281,7 +270,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
      auto cldnn_jit = GetJitConstants(newParams);
  
      for (int i = start_with_iteration; i < 2; i++) {
-        auto runInfo = SetDefault(newParams, options, (i == 1));
+        auto dispatchData = SetDefault(newParams, options, (i == 1));
          auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
  
          if (i == 1){
@@ -291,11 +280,11 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
  
          clKernelData& kernel = kd.kernels[i - start_with_iteration];
  
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3, GetFusedPrimitiveInputsCount(params));
+        FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3, GetFusedPrimitiveInputsCount(params));
      }
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-    
+
      return {kd};
  }
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp

index 8463a0e..7807c85 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -98,7 +98,7 @@ JitConstants SelectKernelBase::GetJitConstants(const select_params& params) cons
  }
  
  SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params& params) const {
-    DispatchData kd;
+    DispatchData dispatchData;
  
      const auto& out = params.output;
  
@@ -111,16 +111,12 @@ SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params&
          gws.push_back(1U);
      }
  
-    kd.gws0 = gws[0];
-    kd.gws1 = gws[1];
-    kd.gws2 = gws[2] * gws[3];
+    dispatchData.gws[0] = gws[0];
+    dispatchData.gws[1] = gws[1];
+    dispatchData.gws[2] = gws[2] * gws[3];
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes({kd.gws0, kd.gws1, kd.gws2}, params.engineInfo);
-    kd.lws0 = local[0];
-    kd.lws1 = local[1];
-    kd.lws2 = local[2];
-
-    return kd;
+    return dispatchData;
  }
  
  KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
@@ -135,12 +131,12 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
-    DispatchData runInfo = SetDefault(newParams);
+    DispatchData dispatchData = SetDefault(newParams);
  
      auto& kernel = kd.kernels[0];
  
-    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local = dispatchData.lws;
  
      kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
      kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp

index a128498..68ce924 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp
@@ -55,23 +55,14 @@ bool ShuffleChannelsKernelRef::Validate(const Params& p, const optional_params&
  
  CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params,
                                                          const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = {params.output.Batch().v,
-                                  params.output.Feature().v,
-                                  params.output.Y().v * params.output.X().v};
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Y().v * params.output.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_params& params) const {
@@ -109,14 +100,14 @@ KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const
  
      assert(params.GetType() == KernelType::SHUFFLE_CHANNELS);
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp

index 25ca988..5d9547a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp
@@ -65,8 +65,8 @@ std::vector<size_t> SoftmaxItemsClassKernelBase::GetSoftmaxDimGlobalSizes(Softma
      }
  }
  
-JitConstants SoftmaxItemsClassKernelBase::GetJitConstants(const softmax_params& params, DispatchData kd) const {
-    auto jit = SoftmaxKernelBase::GetJitConstants(params, kd);
+JitConstants SoftmaxItemsClassKernelBase::GetJitConstants(const softmax_params& params, DispatchData dispatchData) const {
+    auto jit = SoftmaxKernelBase::GetJitConstants(params, dispatchData);
  
      switch (params.dim) {
          case SoftmaxDim::X:
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h

index 2d40103..987778c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h
@@ -24,7 +24,7 @@ public:
      virtual ~SoftmaxItemsClassKernelBase() {}
  
  protected:
-    JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const softmax_params& params, DispatchData dispatchData) const override;
      static ParamsKey GetDefaultSupportedKey();
      static std::vector<size_t> GetSoftmaxDimGlobalSizes(SoftmaxDim dim, const DataTensor& output);
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp

index 065520e..77b3535 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
@@ -16,43 +16,42 @@
  
  namespace kernel_selector {
  JitConstants SoftmaxKernelBase::GetJitConstants(const softmax_params& params,
-                                                SoftmaxKernelBase::DispatchData kd) const {
+                                                SoftmaxKernelBase::DispatchData dispatchData) const {
      JitConstants mem_consts = MakeBaseParamsJitConstants(params);
  
      mem_consts.AddConstants({MakeJitConstant("ALONG_" + toString(params.dim), "")});
  
      mem_consts.AddConstants({
-        MakeJitConstant("ITEMS_NUM", kd.itemsNum),
-        MakeJitConstant("LWS", kd.lws0),
-        MakeJitConstant("GWS", kd.gws0),
-        MakeJitConstant("DATA_SETS_COUNT", kd.dataSetsCount),
-        MakeJitConstant("DATA_SET_SIZE", kd.dataSetSize),
-        MakeJitConstant("LEFTOVERS", kd.leftovers),
+        MakeJitConstant("ITEMS_NUM", dispatchData.itemsNum),
+        MakeJitConstant("LWS", dispatchData.lws[0]),
+        MakeJitConstant("GWS", dispatchData.gws[0]),
+        MakeJitConstant("DATA_SETS_COUNT", dispatchData.dataSetsCount),
+        MakeJitConstant("DATA_SET_SIZE", dispatchData.dataSetSize),
+        MakeJitConstant("LEFTOVERS", dispatchData.leftovers),
      });
  
      return mem_consts;
  }
  
-SoftmaxKernelBase::DispatchData SoftmaxKernelBase::SetDefault(const softmax_params& params,
+SoftmaxKernelBase::DispatchData SoftmaxKernelBase::SetDefault(const softmax_params&,
                                                                const optional_params&) const {
-    DispatchData runInfo;
+    DispatchData dispatchData;
  
-    runInfo.gws0 = 1;
-    runInfo.gws1 = 1;
-    runInfo.gws2 = 1;
+    dispatchData.gws[0] = 1;
+    dispatchData.gws[1] = 1;
+    dispatchData.gws[2] = 1;
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 1;
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = 1;
+    dispatchData.lws[2] = 1;
  
-    runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-    runInfo.leftovers = 0;
-    runInfo.itemsNum = 0;
-    runInfo.normIndex = 0;
-    runInfo.dataSetsCount = 0;
-    runInfo.dataSetSize = 0;
+    dispatchData.leftovers = 0;
+    dispatchData.itemsNum = 0;
+    dispatchData.normIndex = 0;
+    dispatchData.dataSetsCount = 0;
+    dispatchData.dataSetSize = 0;
  
-    return runInfo;
+    return dispatchData;
  }
  
  bool SoftmaxKernelBase::Validate(const Params& p, const optional_params& o) const {
@@ -71,15 +70,15 @@ KernelsData SoftmaxKernelBase::GetCommonKernelsData(const Params& params, const
      const softmax_params& orgParams = static_cast<const softmax_params&>(params);
      KernelData kd = KernelData::Default<softmax_params>(params);
  
-    auto runInfo = SetDefault(orgParams, options);
-    auto cldnn_jit = GetJitConstants(orgParams, runInfo);
+    auto dispatchData = SetDefault(orgParams, options);
+    auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
      auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
      auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
-    kd.estimatedTime = runInfo.efficiency;
+    kd.estimatedTime = dispatchData.efficiency;
  
      return {kd};
  }
@@ -118,12 +117,12 @@ SoftmaxKernelBase::DispatchData SoftmaxKernelBaseBF::SetDefault(const softmax_pa
                                                                  const optional_params& options) const {
      const auto& input = params.inputs[0];
  
-    DispatchData kd = Parent::SetDefault(params, options);
+    DispatchData dispatchData = Parent::SetDefault(params, options);
  
      auto flatten_input = input.FlattenFeatureAndSpatials();
-    kd.dataSetSize = flatten_input.Feature().v;
-    kd.dataSetsCount = input.Batch().v;
+    dispatchData.dataSetSize = flatten_input.Feature().v;
+    dispatchData.dataSetsCount = input.Batch().v;
  
-    return kd;
+    return dispatchData;
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h

index d01e91d..937d0eb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h
@@ -59,7 +59,7 @@ public:
  
  protected:
      virtual bool Validate(const Params&, const optional_params&) const;
-    virtual JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const;
+    virtual JitConstants GetJitConstants(const softmax_params& params, DispatchData dispatchData) const;
      virtual DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const;
      KernelsData GetCommonKernelsData(const Params& params, const optional_params& optParams) const;
  };
@@ -74,4 +74,4 @@ protected:
      bool Validate(const Params&, const optional_params&) const override;
      DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp

index 8a33e17..ae5d29f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp
@@ -35,37 +35,37 @@ ParamsKey SoftmaxKernel_bf::GetSupportedKey() const {
  
  SoftmaxKernel_bf::Parent::DispatchData SoftmaxKernel_bf::SetDefault(const softmax_params& params,
                                                                      const optional_params& optParams) const {
-    auto kd = Parent::SetDefault(params, optParams);
+    auto dispatchData = Parent::SetDefault(params, optParams);
  
      // start with 1 thread per data set
-    kd.gws0 = 1;
-    kd.gws1 = kd.dataSetsCount;
-    kd.itemsNum = kd.dataSetSize;
+    dispatchData.gws[0] = 1;
+    dispatchData.gws[1] = dispatchData.dataSetsCount;
+    dispatchData.itemsNum = dispatchData.dataSetSize;
  
-    kd.normIndex = 0;
+    dispatchData.normIndex = 0;
  
      // We have two units of data per work item in current implementation.
-    auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float));
+    auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType());
      // Combining device execution and local memory restrictions to compute maximum possible LWS.
      auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi);
  
-    kd.lws0 = 1;
+    dispatchData.lws[0] = 1;
      // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory
      // reads.
-    while ((kd.itemsNum > 32 || kd.lws0 < kd.itemsNum) && (2 * kd.lws0 <= max_lws)) {
-        kd.lws0 *= 2;
-        kd.itemsNum /= 2;
+    while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) {
+        dispatchData.lws[0] *= 2;
+        dispatchData.itemsNum /= 2;
      }
  
-    assert((kd.itemsNum + 1) * kd.lws0 >= kd.dataSetSize && "More than 'lws0' items per batch remains! Lws too small?");
+    assert((dispatchData.itemsNum + 1) * dispatchData.lws[0] >= dispatchData.dataSetSize && "More than 'lws[0]' items per batch remains! Lws too small?");
  
-    kd.gws0 = kd.lws0;
-    kd.leftovers = kd.dataSetSize % kd.lws0;
+    dispatchData.gws[0] = dispatchData.lws[0];
+    dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0];
  
-    assert(kd.itemsNum > 0 && kd.lws0 && kd.gws0 > 0);
+    assert(dispatchData.itemsNum > 0 && dispatchData.lws[0] && dispatchData.gws[0] > 0);
  
-    kd.efficiency = FORCE_PRIORITY_6;
-    return kd;
+    dispatchData.efficiency = FORCE_PRIORITY_6;
+    return dispatchData;
  }
  
  KernelsData SoftmaxKernel_bf::GetKernelsData(const Params& params, const optional_params& optionalParams) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp

index 27d0a1d..f31a03d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp
@@ -35,36 +35,36 @@ ParamsKey SoftmaxKernel_fb::GetSupportedKey() const {
  
  SoftmaxKernel_fb::Parent::DispatchData SoftmaxKernel_fb::SetDefault(const softmax_params& params,
                                                                      const optional_params& optParams) const {
-    auto kd = Parent::SetDefault(params, optParams);
+    auto dispatchData = Parent::SetDefault(params, optParams);
      // start with 1 thread per data set
-    kd.gws0 = kd.dataSetsCount;
-    kd.gws1 = 1;
-    kd.itemsNum = kd.dataSetSize;
+    dispatchData.gws[0] = dispatchData.dataSetsCount;
+    dispatchData.gws[1] = 1;
+    dispatchData.itemsNum = dispatchData.dataSetSize;
  
-    kd.normIndex = 1;
+    dispatchData.normIndex = 1;
  
      // We have two units of data per work item in current implementation.
-    auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float));
+    auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType());
      // Combining device execution and local memory restrictions to compute maximum possible LWS.
      auto max_lws = static_cast<std::size_t>(
          std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi));
  
-    kd.lws0 = std::min(kd.dataSetsCount, max_lws);
+    dispatchData.lws[0] = std::min(dispatchData.dataSetsCount, max_lws);
      // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory
      // reads.
-    while ((kd.itemsNum > 32 || kd.lws0 < kd.itemsNum) && (2 * kd.lws0 <= max_lws)) {
-        kd.lws0 *= 2;
-        kd.itemsNum /= 2;
+    while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) {
+        dispatchData.lws[0] *= 2;
+        dispatchData.itemsNum /= 2;
      }
  
-    kd.gws0 = kd.lws0;
-    kd.gws1 = 1;
-    kd.leftovers = (kd.dataSetSize * kd.dataSetsCount) % kd.lws0;
+    dispatchData.gws[0] = dispatchData.lws[0];
+    dispatchData.gws[1] = 1;
+    dispatchData.leftovers = (dispatchData.dataSetSize * dispatchData.dataSetsCount) % dispatchData.lws[0];
  
-    assert(kd.itemsNum > 0 && kd.lws0 && kd.gws0 > 0);
+    assert(dispatchData.itemsNum > 0 && dispatchData.lws[0] && dispatchData.gws[0] > 0);
  
-    kd.efficiency = FORCE_PRIORITY_6;
-    return kd;
+    dispatchData.efficiency = FORCE_PRIORITY_6;
+    return dispatchData;
  }
  
  bool kernel_selector::SoftmaxKernel_fb::Validate(const Params& params, const optional_params& o) const {
@@ -74,8 +74,7 @@ bool kernel_selector::SoftmaxKernel_fb::Validate(const Params& params, const opt
  
      const auto& softmax_params = static_cast<const kernel_selector::softmax_params&>(params);
  
-    auto kd = Parent::SetDefault(softmax_params, o);
-    auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float));
+    auto local_mem_per_wi = 2 * BytesPerElement(softmax_params.inputs[0].GetDType());
      auto max_lws = static_cast<std::size_t>(
          std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi));
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp

index 4af7fc0..3d6cdef 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp
@@ -24,7 +24,7 @@ ParamsKey SoftmaxKerneItemsClassOptimized::GetSupportedKey() const { return GetD
  SoftmaxKerneItemsClassOptimized::Parent::DispatchData SoftmaxKerneItemsClassOptimized::SetDefault(
      const softmax_params& params,
      const optional_params& optParams) const {
-    auto runInfo = Parent::SetDefault(params, optParams);
+    auto dispatchData = Parent::SetDefault(params, optParams);
  
      auto& input = params.inputs[0];
  
@@ -50,30 +50,27 @@ SoftmaxKerneItemsClassOptimized::Parent::DispatchData SoftmaxKerneItemsClassOpti
              break;
      }
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 =
-        global[1] * workitems_per_classes;  // we multiply it by workitems_per_classes because we split computations of
-                                            // one "full item classes output" into multiple workitems by "full item
-                                            // classes output" i mean N outputs where N is number of item classes.
-    runInfo.gws2 = global[2];
+    dispatchData.gws[0] = global[0];
+    dispatchData.gws[1] = global[1] * workitems_per_classes;  // we multiply it by workitems_per_classes because we split computations of
+                                                         // one "full item classes output" into multiple workitems by "full item
+                                                         // classes output" i mean N outputs where N is number of item classes.
+    dispatchData.gws[2] = global[2];
  
-    runInfo.lws0 = 1;
-    runInfo.lws1 = workitems_per_classes;
-    runInfo.lws2 = 1;
+    dispatchData.lws = { 1, workitems_per_classes, 1 };
  
-    runInfo.leftovers = item_class_count % workitems_per_classes;
+    dispatchData.leftovers = item_class_count % workitems_per_classes;
  
      if (item_class_count >= 32) {
-        runInfo.efficiency = FORCE_PRIORITY_7;
+        dispatchData.efficiency = FORCE_PRIORITY_7;
      } else {
-        runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+        dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
      }
  
-    return runInfo;
+    return dispatchData;
  }
  
-JitConstants SoftmaxKerneItemsClassOptimized::GetJitConstants(const softmax_params& params, DispatchData kd) const {
-    auto jit = SoftmaxItemsClassKernelBase::GetJitConstants(params, kd);
+JitConstants SoftmaxKerneItemsClassOptimized::GetJitConstants(const softmax_params& params, DispatchData dispatchData) const {
+    auto jit = SoftmaxItemsClassKernelBase::GetJitConstants(params, dispatchData);
  
      jit.AddConstant(MakeJitConstant("WORKITEMS_PER_CLASSES", workitems_per_classes));
      jit.AddConstant(MakeJitConstant("HAS_DRIVER_PROBLEMS", params.engineInfo.bIMADSupport));
@@ -84,4 +81,4 @@ KernelsData SoftmaxKerneItemsClassOptimized::GetKernelsData(const Params& params
                                                              const optional_params& options) const {
      return GetCommonKernelsData(params, options);
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h

index 2dfb35a..354f28b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h
@@ -27,7 +27,7 @@ public:
      ParamsKey GetSupportedKey() const override;
  
  protected:
-    JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const override;
+    JitConstants GetJitConstants(const softmax_params& params, DispatchData dispatchData) const override;
      DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp

index 809b9e5..8a3f7ec 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp
@@ -20,25 +20,17 @@ ParamsKey SoftmaxKernelRef::GetSupportedKey() const { return GetDefaultSupported
  
  SoftmaxKernelRef::Parent::DispatchData SoftmaxKernelRef::SetDefault(const softmax_params& params,
                                                                      const optional_params& optParams) const {
-    auto runInfo = Parent::SetDefault(params, optParams);
+    auto dispatchData = Parent::SetDefault(params, optParams);
  
-    const auto global = GetSoftmaxDimGlobalSizes(params.dim, params.output);
+    dispatchData.gws = GetSoftmaxDimGlobalSizes(params.dim, params.output);
  
-    assert(global.size() == 3);
+    assert(dispatchData.gws.size() == 3);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
+    dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-
-    return runInfo;
+    return dispatchData;
  }
  
  KernelsData SoftmaxKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp

index 169ff62..1b21b66 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp
@@ -41,27 +41,16 @@ bool SpaceToBatchKernelBase::Validate(const Params& p, const optional_params& o)
  CommonDispatchData SpaceToBatchKernelBase::SetDefault(const space_to_batch_params& params, const optional_params&) const {
      const auto& out = params.output;
  
-    CommonDispatchData runInfo;
-    std::vector<size_t> global;
-    std::vector<size_t> local;
-
+    CommonDispatchData dispatchData;
      if (out.GetLayout() == DataLayout::b_fs_yx_fsv16 && out.Feature().v % 16 == 0) {
-        global = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v };
-        local = {1, 16, 1};
+        dispatchData.gws = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v };
+        dispatchData.lws = {1, 16, 1};
      } else {
-        global = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v };
-        local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+        dispatchData.gws = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v };
+        dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
      }
  
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants SpaceToBatchKernelBase::GetJitConstants(const space_to_batch_params& params) const {
@@ -101,14 +90,14 @@ KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, c
          return {};
      }
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
                       "", false, false, 1, GetFusedPrimitiveInputsCount(params));
  
      kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp

index 8a0b228..23337fb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp
@@ -59,23 +59,14 @@ bool SpaceToDepthKernelRef::Validate(const Params& p, const optional_params& o)
  
  CommonDispatchData SpaceToDepthKernelRef::SetDefault(const space_to_depth_params& params,
                                                       const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
-    std::vector<size_t> global = {params.output.Batch().v,
-                                  params.output.Feature().v,
-                                  params.output.Z().v * params.output.Y().v * params.output.X().v};
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Z().v * params.output.Y().v * params.output.X().v };
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants SpaceToDepthKernelRef::GetJitConstants(const space_to_depth_params& params) const {
@@ -111,14 +102,14 @@ KernelsData SpaceToDepthKernelRef::GetKernelsData(const Params& params, const op
          return {};
      }
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point,
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
                       DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp

index b5b0acf..5c3bbcc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp
@@ -89,25 +89,18 @@ bool StridedSliceKernelRef::Validate(const Params& p, const optional_params& o)
  }
  
  CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
      // If the new_axis_mask is set, then begin, end, and stride are ignored
      // and a new length 1 dimension is adding. Input data just copying to output
      // TODO: remove data copying in case where only shape size changing
-    std::vector<size_t> gws = {params.output.Batch().v, params.output.Feature().v,
-                               params.output.Z().v * params.output.Y().v * params.output.X().v};
+    dispatchData.gws = { params.output.Batch().v,
+                         params.output.Feature().v,
+                         params.output.Z().v * params.output.Y().v * params.output.X().v };
  
-    auto lws = GetOptimalLocalWorkGroupSizes(gws, params.engineInfo);
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
  
-    runInfo.gws0 = gws[0];
-    runInfo.gws1 = gws[1];
-    runInfo.gws2 = gws[2];
-
-    runInfo.lws0 = lws[0];
-    runInfo.lws1 = lws[1];
-    runInfo.lws2 = lws[2];
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params& params) const {
@@ -167,14 +160,14 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
  
      assert(params.GetType() == KernelType::STRIDED_SLICE);
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp

index dc95efd..de2a1e9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -59,7 +59,7 @@ ParamsKey TileKernelRef::GetSupportedKey() const {
  }
  
  CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const {
-    CommonDispatchData runInfo;
+    CommonDispatchData dispatchData;
  
      auto in = params.inputs[0];
  
@@ -77,26 +77,24 @@ CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const op
      }
  
      if (inner_size > 1) {
-        runInfo.gws0 = outer_size;
-        runInfo.gws1 = inner_size;
-        runInfo.gws2 = 1;
+        dispatchData.gws[0] = outer_size;
+        dispatchData.gws[1] = inner_size;
+        dispatchData.gws[2] = 1;
  
-        runInfo.lws0 = 1;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = 1;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      } else {
-        runInfo.gws0 = Align(outer_size, 16);
-        runInfo.gws1 = 1;
-        runInfo.gws2 = 1;
+        dispatchData.gws[0] = Align(outer_size, 16);
+        dispatchData.gws[1] = 1;
+        dispatchData.gws[2] = 1;
  
-        runInfo.lws0 = 16;
-        runInfo.lws1 = 1;
-        runInfo.lws2 = 1;
+        dispatchData.lws[0] = 16;
+        dispatchData.lws[1] = 1;
+        dispatchData.lws[2] = 1;
      }
  
-    runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
-
-    return runInfo;
+    return dispatchData;
  }
  
  JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const {
@@ -135,14 +133,14 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p
      KernelData kd = KernelData::Default<tile_params>(params);
      tile_params& newParams = *static_cast<tile_params*>(kd.params.get());
  
-    auto runInfo = SetDefault(newParams, options);
+    auto dispatchData = SetDefault(newParams, options);
      auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
      auto cldnn_jit = GetJitConstants(newParams);
      std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
  
      auto& kernel = kd.kernels[0];
  
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
  
      kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp

index a6f3bb2..4bf514b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp
@@ -160,30 +160,6 @@ std::shared_ptr<KernelString> common_kernel_base::GetKernelString(const std::str
      return kernel_string;
  }
  
-static void Check_RunInfoData(const std::string& kernelName, const kernel_selector::CommonDispatchData& runInfo) {
-    if (runInfo.lws0 * runInfo.lws1 * runInfo.lws2 > 256) {
-        std::cout << "ERROR: dispatch data for kernel: " << kernelName << " LWS cannot be greater than 256!\n"
-                  << std::endl;
-    }
-    if (runInfo.gws0 == 0 || runInfo.gws1 == 0 || runInfo.gws2 == 0 || runInfo.lws0 == 0 || runInfo.lws1 == 0 ||
-        runInfo.lws2 == 0) {
-        std::cout << "ERROR: dispatch data for kernel: " << kernelName << " dispatch data cannot contain zeros!"
-                  << std::endl;
-    }
-    if (runInfo.gws0 % runInfo.lws0 != 0) {
-        std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS0: " << runInfo.gws0
-                  << " LWS0: " << runInfo.lws0 << std::endl;
-    }
-    if (runInfo.gws1 % runInfo.lws1 != 0) {
-        std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS1: " << runInfo.gws1
-                  << " LWS1: " << runInfo.lws1 << std::endl;
-    }
-    if (runInfo.gws2 % runInfo.lws2 != 0) {
-        std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS2: " << runInfo.gws2
-                  << " LWS2: " << runInfo.lws2 << std::endl;
-    }
-}
-
  uint32_t common_kernel_base::GetFusedPrimitiveInputsCount(const Params &params) const {
      auto p = dynamic_cast<const base_params&>(params);
      uint32_t fused_deps_total = 0;
@@ -195,7 +171,7 @@ uint32_t common_kernel_base::GetFusedPrimitiveInputsCount(const Params &params)
  }
  
  void common_kernel_base::FillCLKernelData(clKernelData& kernel,
-                                          const CommonDispatchData& runInfo,
+                                          const CommonDispatchData& dispatchData,
                                            const EngineInfo& engine_info,
                                            const std::string& kernelMapName,
                                            const std::string& jit,
@@ -205,11 +181,10 @@ void common_kernel_base::FillCLKernelData(clKernelData& kernel,
                                            bool bias,
                                            int number_of_inputs,
                                            uint32_t number_of_inputs_for_fused_prims) const {
-    Check_RunInfoData(kernelMapName, runInfo);
-    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+    CheckDispatchData(kernelMapName, dispatchData);
+    kernel.workGroups.global = dispatchData.gws;
+    kernel.workGroups.local = dispatchData.lws;
      kernel.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode);
-    kernel.arguments =
-        GetArgsDesc(number_of_inputs, weights, bias, number_of_inputs_for_fused_prims);
+    kernel.arguments = GetArgsDesc(number_of_inputs, weights, bias, number_of_inputs_for_fused_prims);
  }
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h

index 3dc1c5f..cb89cc1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h
@@ -21,16 +21,6 @@
  #include <vector>
  
  namespace kernel_selector {
-struct CommonDispatchData {
-    // TODO: change it to std::vector<size_t>
-    size_t gws0, gws1, gws2;
-    size_t lws0, lws1, lws2;
-    bool
-        fp16UnitUsed;  ///< Value indicating that FP16 half precision floating point type will be used (instead of single precision).
-    float efficiency;
-
-    CommonDispatchData() : gws0(0), gws1(0), gws2(0), lws0(0), lws1(0), lws2(0), fp16UnitUsed(false), efficiency(0.0f){}
-};
  
  class common_kernel_base : public KernelBase {
  public:
@@ -58,7 +48,7 @@ protected:
      uint32_t GetFusedPrimitiveInputsCount(const Params &params) const;
  
      void FillCLKernelData(clKernelData& kernel,
-                          const CommonDispatchData& runInfo,
+                          const CommonDispatchData& dispatchData,
                            const EngineInfo& engine_info,
                            const std::string& kernel_map_name,
                            const std::string& jit,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp

index d566440..ae87204 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
@@ -475,14 +475,16 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
          std::string calcFunction;
  
          WeightIndexFuncDesc() = default;
-        WeightIndexFuncDesc(const WeightsLayout l) {
+        WeightIndexFuncDesc(std::string tensor_name, const WeightsLayout l) {
+            const auto layout_name = toString(l);
              using args = std::initializer_list<std::string>;
-            if (l == WeightsLayout::oiyx || l == WeightsLayout::oizyx || l == WeightsLayout::goiyx ||
+            if (l == WeightsLayout::oiyx ||
+                l == WeightsLayout::oizyx ||
+                l == WeightsLayout::goiyx ||
                  l == WeightsLayout::goizyx) {
                  args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x"};
-                const auto name = toString(l);
-                this->calcFunction = FuncBody(name);
-                this->macroName = MacroName(name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
                  this->macroBody = R"V0G0N( \
      CAT(prefix, _OFFSET) + \
      (x)*CAT(prefix, _X_PITCH) + \
@@ -495,9 +497,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
              } else if (l == WeightsLayout::os_is_yx_isv16_osv16 || l == WeightsLayout::os_is_zyx_isv16_osv16 ||
                         l == WeightsLayout::g_os_is_yx_isv16_osv16 || l == WeightsLayout::g_os_is_zyx_isv16_osv16) {
                  args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x", "sub_group_size"};
-                const auto name = toString(l);
-                this->calcFunction = FuncBody(name);
-                this->macroName = MacroName(name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
                  this->macroBody = R"V0G0N( \
      CAT(prefix, _OFFSET) + \
      (g)*CAT(prefix, _GROUPS_PITCH) + \
@@ -515,9 +516,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
                         l == WeightsLayout::os_iyx_osv32__ai32 || l == WeightsLayout::g_os_iyx_osv16 ||
                         l == WeightsLayout::g_os_iyx_osv32) {
                  args macroNameArgs = {"prefix", "g", "o", "i", "y", "x", "sub_group_size"};
-                const auto name = toString(l);
-                this->calcFunction = FuncBody(name);
-                this->macroName = MacroName(name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
                  this->macroBody = R"V0G0N( \
      CAT(prefix, _OFFSET) + \
      (g * CAT(prefix, _GROUPS_PITCH)) + \
@@ -532,9 +532,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
              } else if (l == WeightsLayout::is_os_yx_isv16_osv16 || l == WeightsLayout::is_os_zyx_isv16_osv16 ||
                         l == WeightsLayout::g_is_os_yx_isv16_osv16 || l == WeightsLayout::g_is_os_zyx_isv16_osv16) {
                  args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x", "sub_group_size"};
-                const auto name = toString(l);
-                this->calcFunction = FuncBody(name);
-                this->macroName = MacroName(name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
                  this->macroBody = R"V0G0N( \
      CAT(prefix, _OFFSET) + \
      (g)*CAT(prefix, _GROUPS_PITCH) + \
@@ -552,7 +551,6 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
                         l == WeightsLayout::os_is_zyx_osv64_isv16) {
                  args macroNameArgs = {"prefix", "o", "i", "z", "y", "x"};
                  args funcArgs = {"o", "i", "z", "y", "x", "x_size", "y_size", "z_size", "i_size", "o_size", "osv_size", "isv_size"};
-                const auto name = toString(l);
                  const auto body = R"V0G0N( \
      const uint isv = i % isv_size; \
      const uint osv = o % osv_size; \
@@ -573,19 +571,18 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
          os * os_pitch; \
      return output_offset; \
                  )V0G0N";
-                this->macroName = MacroName(name, macroNameArgs);
-                this->calcFunction = FuncBody(name, funcArgs, body);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name, funcArgs, body);
                  if (l == WeightsLayout::os_is_yx_osv16_isv16)
-                    this->macroBody = FuncCall(name, {"o", "i", "0", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), "1", Cat("_IFM_NUM"), Cat("_OFM_NUM"), "16", "16"});
+                    this->macroBody = FuncCall(layout_name, {"o", "i", "0", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), "1", Cat("_IFM_NUM"), Cat("_OFM_NUM"), "16", "16"});
                  else if (l == WeightsLayout::os_is_zyx_osv32_isv16)
-                    this->macroBody = FuncCall(name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "32", "16"});
+                    this->macroBody = FuncCall(layout_name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "32", "16"});
                  else if (l == WeightsLayout::os_is_zyx_osv64_isv16)
-                    this->macroBody = FuncCall(name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "64", "16"});
+                    this->macroBody = FuncCall(layout_name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "64", "16"});
              } else if (l == WeightsLayout::g_os_zyx_is_osv16_isv16 || l == WeightsLayout::g_os_zyx_is_osv16_isv32 ||
                         l == WeightsLayout::g_os_zyx_is_osv32_isv16 || l == WeightsLayout::g_os_zyx_is_osv32_isv32) {
                  args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x"};
                  args funcArgs = {"g", "o", "i", "z", "y", "x", "g_size", "o_size", "i_size", "z_size", "y_size", "x_size", "osv", "isv"};
-                const auto name = toString(l);
                  const auto body = R"V0G0N( \
      uint is_size = (i_size + isv - 1) / isv; \
      uint os_size = (o_size + osv - 1) / osv; \
@@ -612,8 +609,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
      index += g * g_pitch; \
      return index; \
                  )V0G0N";
-                this->macroName = MacroName(name, macroNameArgs);
-                this->calcFunction = FuncBody(name, funcArgs, body);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name, funcArgs, body);
                  std::string osv = "16", isv = "16";
                  if (l == WeightsLayout::g_os_zyx_is_osv16_isv16) {
                      osv = "16"; isv = "16";
@@ -624,12 +621,11 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
                  } else if (l == WeightsLayout::g_os_zyx_is_osv32_isv32) {
                      osv = "32"; isv = "32";
                  }
-                this->macroBody = FuncCall(name, {"g", "o", "i", "z", "y", "x", Cat("_GROUPS_NUM"), Cat("_OFM_NUM"), Cat("_IFM_NUM"), Cat("_SIZE_Z"),
-                                                  Cat("_SIZE_Y"), Cat("_SIZE_X"), osv, isv});
+                this->macroBody = FuncCall(layout_name, {"g", "o", "i", "z", "y", "x", Cat("_GROUPS_NUM"), Cat("_OFM_NUM"), Cat("_IFM_NUM"), Cat("_SIZE_Z"),
+                                                         Cat("_SIZE_Y"), Cat("_SIZE_X"), osv, isv});
              } else if (l == WeightsLayout::os_is_yx_osv16_isv4 || l == WeightsLayout::os_is_yx_osv32_isv4) {
                  args macroNameArgs = {"prefix", "o", "i", "y", "x"};
                  args funcArgs = {"o", "i", "y", "x", "i_size", "o_size", "x_size", "otd"};
-                const auto name = toString(l);
                  const auto body = R"V0G0N( \
      uint out_depth_tile = o / otd; \
      uint od             = o - out_depth_tile * otd; \
@@ -644,12 +640,12 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
              + id; \
      return idx; \
                  )V0G0N";
-                this->macroName = MacroName(name, macroNameArgs);
-                this->calcFunction = FuncBody(name, funcArgs, body);
+                this->macroName = MacroName(tensor_name, layout_name, macroNameArgs);
+                this->calcFunction = FuncBody(layout_name, funcArgs, body);
                  if (l == WeightsLayout::os_is_yx_osv16_isv4)
-                    this->macroBody = FuncCall(name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "16"});
+                    this->macroBody = FuncCall(layout_name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "16"});
                  else if (l == WeightsLayout::os_is_yx_osv32_isv4)
-                    this->macroBody = FuncCall(name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "32"});
+                    this->macroBody = FuncCall(layout_name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "32"});
              } else {
                  // throw error?
              }
@@ -667,12 +663,12 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
              return "FUNC_CALL(" + name + ")(" + args_str + ")";
          }
  
-        static const std::string MacroName(std::string name, std::initializer_list<std::string> args) {
+        static const std::string MacroName(std::string tensor_name, std::string layout_name, std::initializer_list<std::string> args) {
              std::string args_str = "";
              size_t counter = 0;
              for (auto& arg : args)
                  args_str += (++counter == args.size()) ? (arg) : (arg + ", ");
-            return "GET_WEIGHTS_" + name + "_INDEX(" + args_str + ")";
+            return "GET_" + tensor_name + "_" + layout_name + "_INDEX(" + args_str + ")";
          }
  
          static const std::string FuncBody(std::string name, std::initializer_list<std::string> args = {}, std::string body = "return 0;") {
@@ -727,7 +723,9 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
      std::string index_func_val;
  
      auto layout = _tensor.GetLayout();
-    WeightIndexFuncDesc indexFuncDesc {layout};
+    auto layout_str = toString(layout);
+    WeightIndexFuncDesc indexFuncDesc{_name, layout};
+    std::string called_func_name = "GET_" + _name + "_" + layout_str + "_INDEX";
      if (WeightsTensor::DoesGroupDimExist(layout)) {
          if (WeightsTensor::ChannelsCount(layout) <= 5) {
              std::vector<Tensor::WeightsChannelName> grouped_4d_channels = {
@@ -740,15 +738,14 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
              bool is_grouped_4d_layout = is_common_nd_layout(grouped_4d_channels, layout);
              if (is_grouped_4d_layout) {
                  index_macro_name = _name + "_GET_INDEX(g, o, i, y, x)";
-                auto layout_str = toString(layout);
                  if (layout == WeightsLayout::goiyx)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x)";
                  else if (layout == WeightsLayout::g_os_is_yx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x, 16)";
                  else if (layout == WeightsLayout::g_os_iyx_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, y, x, 16)";
                  else if (layout == WeightsLayout::g_is_os_yx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x, 16)";
              } else {
                  assert(0);
              }
@@ -764,13 +761,12 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
              bool is_grouped_5d_layout = is_common_nd_layout(grouped_5d_channels, layout);
              if (is_grouped_5d_layout) {
                  index_macro_name = _name + "_GET_INDEX(g, o, i, z, y, x)";
-                auto layout_str = toString(layout);
                  if (layout == WeightsLayout::goizyx)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, z, y, x)";
                  else if (layout == WeightsLayout::g_os_is_zyx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, z, y, x, 16)";
                  else if (layout == WeightsLayout::g_is_os_zyx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", g, o, i, z, y, x, 16)";
              } else {
                  assert(0);
              }
@@ -786,19 +782,18 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
              bool is_common_4d_layout = is_common_nd_layout(base_4d_channels, layout);
              if (is_common_4d_layout) {
                  index_macro_name = _name + "_GET_INDEX(o, i, y, x)";
-                auto layout_str = toString(layout);
                  if (layout == WeightsLayout::oiyx)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, 0, y, x)";
                  else if (layout == WeightsLayout::os_is_yx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, 0, y, x, 16)";
                  else if (layout == WeightsLayout::os_iyx_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, y, x, 16)";
                  else if (layout == WeightsLayout::os_iyx_osv32 || layout == WeightsLayout::os_iyx_osv32__ai32)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, y, x, 32)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, y, x, 32)";
                  else if (layout == WeightsLayout::is_os_yx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, 0, y, x, 16)";
                  else if (layout == WeightsLayout::os_is_yx_osv16_isv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", o, i, 0, y, x)";
+                    index_func_val = called_func_name + "(" + _name + ", o, i, 0, y, x)";
              } else {
                  assert(0);
              }
@@ -813,15 +808,14 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
              bool is_common_5d_layout = is_common_nd_layout(base_5d_channels, layout);
              if (is_common_5d_layout) {
                  index_macro_name = _name + "_GET_INDEX(o, i, z, y, x)";
-                auto layout_str = toString(layout);
                  if (layout == WeightsLayout::oizyx)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, z, y, x)";
                  else if (layout == WeightsLayout::os_is_zyx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, z, y, x, 16)";
                  else if (layout == WeightsLayout::is_os_zyx_isv16_osv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x, 16)";
+                    index_func_val = called_func_name + "(" + _name + ", 0, o, i, z, y, x, 16)";
                  else if (layout == WeightsLayout::os_is_zyx_osv32_isv16 || layout == WeightsLayout::os_is_zyx_osv64_isv16)
-                    index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", o, i, z, y, x)";
+                    index_func_val = called_func_name + "(" + _name + ", o, i, z, y, x)";
              } else {
                  assert(0);
              }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp

index 3d23771..acb26f7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
@@ -15,10 +15,51 @@
  
  #include "kernel_base.h"
  
+#include <sstream>
+
  namespace kernel_selector {
  const primitive_db KernelBase::db;
  thread_local size_t KernelBase::counter = 0;
  
+std::string toString(const kernel_selector::CommonDispatchData& dispatchData) {
+    auto gws = dispatchData.gws;
+    auto lws = dispatchData.lws;
+    std::stringstream os;
+    os << "GWS(" << gws.size() << "): ";
+    for (auto e : gws) {
+        os << e << " ";
+    }
+    os << "LWS(" << lws.size() << "): ";
+    for (auto e : lws) {
+        os << e << " ";
+    }
+    return os.str();
+}
+
+void KernelBase::CheckDispatchData(const std::string& kernelName, const kernel_selector::CommonDispatchData& dispatchData) {
+    if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3)
+        throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName + ": " +
+                                 ": LWS and GWS size is expected to be equal to 3. Actual: " +
+                                 toString(dispatchData));
+
+    if (dispatchData.lws[0] * dispatchData.lws[1] * dispatchData.lws[2] > 256) {
+        throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName +
+                                 ": LWS cannot be greater than 256. Actual: " +
+                                 toString(dispatchData));
+    }
+    for (size_t i = 0; i < dispatchData.gws.size(); i++) {
+        if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0)
+            throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName +
+                                     ": Dispatch data cannot contain zeros. Actual: " +
+                                     toString(dispatchData));
+
+        if (dispatchData.gws[i] % dispatchData.lws[i] != 0)
+            throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName +
+                                     ": GWS must be divisible by corresponding LWS. Actual: " +
+                                     toString(dispatchData));
+    }
+}
+
  static bool IsTypeUsedIn(Datatype type, const base_params& params) {
      return params.output.GetDType() == type ||
             std::any_of(params.inputs.begin(), params.inputs.end(), [=](const DataTensor& input) -> bool {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h

index 3b2aa55..a795b5f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
@@ -26,6 +26,20 @@
  namespace kernel_selector {
  using primitive_db = kernel_selector::gpu::cache::primitive_db;
  
+struct CommonDispatchData {
+    std::vector<size_t> gws;
+    std::vector<size_t> lws;
+    float efficiency;
+
+    CommonDispatchData() : gws({0, 0, 0}), lws({0, 0, 0}), efficiency(0.0f) {}
+};
+
+std::string toString(const kernel_selector::CommonDispatchData& dispatchData);
+
+static inline std::ostream &operator<<(std::ostream &os, CommonDispatchData disptchData) {
+    return os << toString(disptchData);
+}
+
  class KernelBase {
  public:
      using FusedOpType = KernelType;
@@ -56,6 +70,7 @@ protected:
      static const primitive_db db;
      const std::string kernelName;
  
+    static void CheckDispatchData(const std::string& kernelName, const kernel_selector::CommonDispatchData& dispatchData);
      static size_t UniqeID() { return counter++; }  // TODO: use interlocked
      virtual Datatype GetUnitType(const base_params& params) const;
author	Vladimir Paramuzov <vladimir.paramuzov@intel.com>
	Mon, 19 Oct 2020 15:45:05 +0000 (18:45 +0300)
committer	GitHub <noreply@github.com>
	Mon, 19 Oct 2020 15:45:05 +0000 (18:45 +0300)