From: Vladimir Paramuzov Date: Mon, 19 Oct 2020 15:45:05 +0000 (+0300) Subject: [IE CLDNN] DispatchData refactoring (#2508) X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9367266ed5e55954de1c4737d10752aa86a5abbd;p=platform%2Fupstream%2Fdldt.git [IE CLDNN] DispatchData refactoring (#2508) --- diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp index 31b2041..751278c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp @@ -23,31 +23,21 @@ namespace kernel_selector { ActivationKernelBase::DispatchData ActivationKernelBase::SetDefault(const activation_params& arg) const { const auto& out = arg.output; - DispatchData runInfo; - std::vector global; - std::vector local; + DispatchData dispatchData; if (out.GetLayout() == DataLayout::yxfb) { - global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v}; - local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo); + dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v}; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo); } else if (out.GetLayout() == DataLayout::b_fs_yx_fsv16) { - global = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v}; - local = {16, 1, 1}; + dispatchData.gws = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v}; + dispatchData.lws = {16, 1, 1}; } else { - global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; - local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo); + dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo); } - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16; - - return runInfo; + return dispatchData; } JitConstants ActivationKernelBase::GetJitConstants(const activation_params& params, DispatchData) const { @@ -94,20 +84,20 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con activation_params& newParams = *static_cast(kd.params.get()); const std::string kernel_id = GetEntryPoint(kernelName, params.layerID, options); - auto runInfo = SetDefault(newParams); - auto cldnn_jit = GetJitConstants(newParams, runInfo); + auto dispatchData = SetDefault(newParams); + auto cldnn_jit = GetJitConstants(newParams, dispatchData); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); if (!newParams.inputActivationParams.empty()) { kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0}); } - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h index 2ae9244..3059e5d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.h @@ -65,7 +65,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - virtual JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const activation_params& arg) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const; }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp index cbe1707..57aaba8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp @@ -38,24 +38,16 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const { } ActivationKernelOpt::Parent::DispatchData ActivationKernelOpt::SetDefault(const activation_params& params) const { - auto runInfo = Parent::SetDefault(params); + auto dispatchData = Parent::SetDefault(params); const auto totalSize = params.inputs[0].LogicalSize(); - std::vector global = {totalSize / NUM_COLS_WI}; - std::vector local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { totalSize / NUM_COLS_WI, 1, 1 }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.efficiency = FORCE_PRIORITY_6; - runInfo.lws0 = local[0]; - runInfo.lws1 = 1; - runInfo.lws2 = 1; - - runInfo.efficiency = FORCE_PRIORITY_6; - - return runInfo; + return dispatchData; } bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) const { @@ -87,8 +79,8 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co return true; } -JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData kd) const { - auto jit = ActivationKernelBase::GetJitConstants(params, kd); +JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData dispatchData) const { + auto jit = ActivationKernelBase::GetJitConstants(params, dispatchData); auto input_dt = params.inputs[0].GetDType(); jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h index 51545be..7a4a9bc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h @@ -33,7 +33,7 @@ protected: static const int NUM_COLS_WI = 4; DispatchData SetDefault(const activation_params& arg) const override; bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override; std::vector GetSupportedFusedOps() const override { return {FusedOpType::QUANTIZE, FusedOpType::SCALE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp index 89f019c..9e35b7b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp @@ -41,8 +41,8 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const { return k; } -JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData kd) const { - auto jit = ActivationKernelBase::GetJitConstants(params, kd); +JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData dispatchData) const { + auto jit = ActivationKernelBase::GetJitConstants(params, dispatchData); auto input_dt = params.inputs[0].GetDType(); if (!params.fused_ops.empty()) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h index e8e170b..0f94622 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h @@ -27,7 +27,7 @@ public: KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; - JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override; std::vector GetSupportedFusedOps() const override { return {FusedOpType::QUANTIZE, FusedOpType::SCALE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp index 15fc570..7e2aff5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp @@ -86,22 +86,11 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti } const arg_max_min_params& orgParams = static_cast(params); - DispatchData runInfo; - runInfo.fp16UnitUsed = orgParams.inputs[0].GetDType() == Datatype::F16; - size_t sort_size = orgParams.argMaxMinSortType == ArgMaxMinSortType::VALUE ? getSortSize(orgParams) : 1; - std::vector local, global; - global = { Align(getOperationNumber(orgParams), 32), sort_size, 1 }; - local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; + DispatchData dispatchData; + dispatchData.gws = { Align(getOperationNumber(orgParams), 32), sort_size, 1 }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); KernelData kd = KernelData::Default(params); @@ -110,7 +99,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); if (orgParams.outputs_num == 2) { kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp index 257e1cb..2d0c3e5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,20 +37,12 @@ JitConstants ArgMaxMinKernelBase::GetJitConstants(const arg_max_min_params& para } ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelBase::SetDefault(const arg_max_min_params& params) const { - DispatchData kd; + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + dispatchData.gws = { 128, params.inputs[0].Batch().v, 1 }; + dispatchData.lws = { 128, 1, 1 }; - // Determine global work sizes. - kd.gws0 = 128; - kd.gws1 = params.inputs[0].Batch().v; - kd.gws2 = 1; - - kd.lws0 = 128; - kd.lws1 = 1; - kd.lws2 = 1; - - return kd; + return dispatchData; } KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const { @@ -60,7 +52,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons const arg_max_min_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); @@ -69,7 +61,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp index ac03f6f..30938a2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,11 +37,11 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio const arg_max_min_params& orgParams = static_cast(params); - int topK = orgParams.topK; - long size = (long)(orgParams.inputs[0].X().v * orgParams.inputs[0].Y().v * orgParams.inputs[0].Feature().v) / 8; - long outSize = size / 16 * topK; + size_t topK = orgParams.topK; + size_t size = (size_t)(orgParams.inputs[0].X().v * orgParams.inputs[0].Y().v * orgParams.inputs[0].Feature().v) / 8; + size_t outSize = size / 16 * topK; int kernelAmount = 1; - for (; outSize > 128; outSize = (long)((outSize / 128 + 1) * topK)) { + for (; outSize > 128; outSize = (size_t)((outSize / 128 + 1) * topK)) { kernelAmount++; } KernelData kd = KernelData::Default(params, kernelAmount); @@ -57,22 +57,15 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio newParams.inputs[0] = input; auto& kernel = kd.kernels[i]; - DispatchData runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto cldnnJit = GetJitConstants(newParams); auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - runInfo.fp16UnitUsed = orgParams.inputs[0].GetDType() == Datatype::F16; + dispatchData.gws = { Align(size, 16), orgParams.inputs[0].Batch().v, 1 }; + dispatchData.lws = { 16, 1, 1 }; - runInfo.gws0 = Align(size, 16); - runInfo.gws1 = orgParams.inputs[0].Batch().v; // B - runInfo.gws2 = 1; - - runInfo.lws0 = 16; - runInfo.lws1 = 1; - runInfo.lws2 = 1; - - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint); size = (size / 128 + 1) * topK; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp index b73ce22..7b6a475 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,32 +38,30 @@ AverageUnpoolingKernelBase::DispatchData AverageUnpoolingKernelBase::SetDefault( const average_unpooling_params& params) const { const auto& input = params.inputs[0]; - DispatchData kd; + DispatchData dispatchData; if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) { // Determine global work sizes. - kd.gws2 = input.Batch().v * input.Feature().v; // B, F - kd.gws0 = Align(input.X().v, 32); // X - kd.gws1 = input.Y().v; // Y + dispatchData.gws = { Align(input.X().v, 32), // X + input.Y().v, // Y + input.Batch().v * input.Feature().v, // B, F + }; - kd.lws0 = 32; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws = { 32, 1, 1 }; } else { // Determine global work sizes. - kd.gws0 = input.Batch().v * input.Feature().v; // B, F - kd.gws1 = input.X().v; // X - kd.gws2 = input.Y().v; // Y - - kd.lws0 = std::min(std::max(kd.gws0, static_cast(1)), static_cast(32)); - while (kd.gws0 % kd.lws0 != 0) { - --kd.lws0; + dispatchData.gws = { input.Batch().v * input.Feature().v, // B, F + input.X().v, // X + input.Y().v }; // Y + + dispatchData.lws = {1, 1, 1}; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - kd.lws1 = 1; - kd.lws2 = 1; } - return kd; + return dispatchData; } KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& params, @@ -75,7 +73,7 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param const average_unpooling_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); @@ -84,10 +82,10 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = estimatedTime; return {kd}; } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp index ffbeb87..1b7f0bc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_to_space/batch_to_space_kernel_base.cpp @@ -41,27 +41,16 @@ bool BatchToSpaceKernelBase::Validate(const Params& p, const optional_params& o) CommonDispatchData BatchToSpaceKernelBase::SetDefault(const batch_to_space_params& params, const optional_params&) const { const auto& out = params.output; - CommonDispatchData runInfo; - std::vector global; - std::vector local; - + CommonDispatchData dispatchData; if (out.GetLayout() == DataLayout::b_fs_yx_fsv16 && out.Feature().v % 16 == 0) { - global = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v }; - local = {1, 16, 1}; + dispatchData.gws = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v }; + dispatchData.lws = { 1, 16, 1 }; } else { - global = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v }; - local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); } - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants BatchToSpaceKernelBase::GetJitConstants(const batch_to_space_params& params) const { @@ -101,14 +90,14 @@ KernelsData BatchToSpaceKernelBase::GetCommonKernelsData(const Params& params, c return {}; } - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 1, GetFusedPrimitiveInputsCount(params)); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp index 15e6c48..9fb6b25 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp @@ -43,10 +43,8 @@ ParamsKey BinaryConvolutionKernel1x1::GetSupportedKey() const { return k; } -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault( - const binary_convolution_params& params, - int) const { - DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params); +BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault(const binary_convolution_params& params, int) const { + DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -55,17 +53,17 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = Align(x * y, sub_group_size); - kd.gws1 = CeilDiv(f, 2 * sub_group_size); // 1 WI calcs 32 OC - kd.gws2 = b; + dispatchData.gws[0] = Align(x * y, sub_group_size); + dispatchData.gws[1] = CeilDiv(f, 2 * sub_group_size); // 1 WI calcs 32 OC + dispatchData.gws[2] = b; - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return kd; + return dispatchData; } bool BinaryConvolutionKernel1x1::Validate(const Params& p, const optional_params& o) const { @@ -89,8 +87,8 @@ bool BinaryConvolutionKernel1x1::Validate(const Params& p, const optional_params } JitConstants BinaryConvolutionKernel1x1::GetJitConstants(const binary_convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size))); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h index 7be1117..fe47b1b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.h @@ -35,9 +35,9 @@ protected: return WeightsLayout::os_is_yx_osv32_isv32p; } JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& kd) const override; + const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp index bf680a4..ccf6420 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp @@ -46,7 +46,7 @@ ParamsKey BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetSupportedKey() const { BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1_b_fs_yx_fsv16::SetDefault( const binary_convolution_params& params, int) const { - DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -55,17 +55,15 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1_b_fs_yx_fsv auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = Align(x * y, sub_group_size); - kd.gws1 = CeilDiv(f, sub_group_size); // 1 WI calcs 16 OC - kd.gws2 = b; + dispatchData.gws[0] = Align(x * y, sub_group_size); + dispatchData.gws[1] = CeilDiv(f, sub_group_size); // 1 WI calcs 16 OC + dispatchData.gws[2] = b; - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws = { sub_group_size, 1, 1 }; - kd.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return kd; + return dispatchData; } bool BinaryConvolutionKernel1x1_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const { @@ -89,8 +87,8 @@ bool BinaryConvolutionKernel1x1_b_fs_yx_fsv16::Validate(const Params& p, const o } JitConstants BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetJitConstants(const binary_convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size))); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h index 182267f..74cc9b9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h @@ -35,9 +35,9 @@ protected: return WeightsLayout::os_is_yx_osv32_isv32p; } JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& kd) const override; + const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp index 3ee6895..fe6f349 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp @@ -43,9 +43,9 @@ bool BinaryConvolutionKernelBase::Validate(const Params& p, const optional_param } JitConstants BinaryConvolutionKernelBase::GetJitConstants(const binary_convolution_params& params, - const DispatchData& kd) const { + const DispatchData& dispatchData) const { JitConstants jit = WeightBiasKernelBase::GetJitConstants(params); - jit.Merge(GetFusedPrimitivesJitConstants(params, kd)); + jit.Merge(GetFusedPrimitivesJitConstants(params, dispatchData)); jit.AddConstants({ MakeJitConstant("STRIDE", params.stride), @@ -63,25 +63,25 @@ JitConstants BinaryConvolutionKernelBase::GetFusedPrimitivesJitConstants(const b return {}; } -bool BinaryConvolutionKernelBase::CheckWorkGroups(const BinaryConvolutionKernelBase::DispatchData& kd) { - if (kd.gws0 == 0 || kd.gws1 == 0 || kd.gws2 == 0 || kd.lws0 == 0 || kd.lws1 == 0 || kd.lws2 == 0) { +bool BinaryConvolutionKernelBase::CheckWorkGroups(const BinaryConvolutionKernelBase::DispatchData& dispatchData) { + if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3) return false; - } - if ((kd.gws0 % kd.lws0) != 0 || (kd.gws1 % kd.lws1) != 0 || (kd.gws2 % kd.lws2) != 0) { - return false; + for (size_t i = 0; i < dispatchData.gws.size(); i++) { + if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0) + return false; + if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0) + return false; } return true; } -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefault( - const binary_convolution_params& params, - int) const { - DispatchData kd; +BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefault(const binary_convolution_params& params, + int) const { + DispatchData dispatchData; const auto& out = params.output; - kd.fp16UnitUsed = out.GetDType() == Datatype::F16; std::vector global; if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf) { global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v}; @@ -91,28 +91,23 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefaul auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - kd.cldnnStyle.blockWidth = 1; - kd.cldnnStyle.blockHeight = 1; - kd.cldnnStyle.prefetch = 0; - kd.cldnnStyle.inputBlockArraySize = 0; - kd.cldnnStyle.inputBlockWidth = 0; - - kd.gemmStyle.globalWorkSizeDX = 1; - kd.gemmStyle.globalWorkSizeDY = 1; - kd.gemmStyle.globalWorkSizeDZ = 1; - kd.gemmStyle.subBlockDimK = 1; - kd.gemmStyle.subBlockDimM = 0; - kd.gemmStyle.subBlockDimN = 0; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - return kd; + dispatchData.gws = global; + dispatchData.lws = local; + + dispatchData.cldnnStyle.blockWidth = 1; + dispatchData.cldnnStyle.blockHeight = 1; + dispatchData.cldnnStyle.prefetch = 0; + dispatchData.cldnnStyle.inputBlockArraySize = 0; + dispatchData.cldnnStyle.inputBlockWidth = 0; + + dispatchData.gemmStyle.globalWorkSizeDX = 1; + dispatchData.gemmStyle.globalWorkSizeDY = 1; + dispatchData.gemmStyle.globalWorkSizeDZ = 1; + dispatchData.gemmStyle.subBlockDimK = 1; + dispatchData.gemmStyle.subBlockDimM = 0; + dispatchData.gemmStyle.subBlockDimN = 0; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + return dispatchData; } KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& params, @@ -129,9 +124,9 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para if (NeedPaddedInput()) { kd.reorderInput = CovolutionBinaryUpdateInputParams(newParams); } - DispatchData runInfo = SetDefault(newParams, autoTuneIndex); + DispatchData dispatchData = SetDefault(newParams, autoTuneIndex); - if (!CheckWorkGroups(runInfo)) { + if (!CheckWorkGroups(dispatchData)) { // Internal Error - wrong calculation of global/local work group sizes return {}; } @@ -147,7 +142,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para } auto finalKernelName = GetKernelName(newParams); - auto cldnnJit = GetJitConstants(newParams, runInfo); + auto cldnnJit = GetJitConstants(newParams, dispatchData); auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint); @@ -161,7 +156,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para } FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, finalKernelName, jit, @@ -173,7 +168,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para fused_deps_total); kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; kd.autoTuneIndex = autoTuneIndex; return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h index ffa92e0..b8ff2d3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.h @@ -66,9 +66,9 @@ protected: virtual std::string GetKernelName(const binary_convolution_params&) const { return kernelName; } virtual bool NeedPaddedInput() const { return false; } bool Validate(const Params& p, const optional_params& o) const override; - virtual JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const; + virtual JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const; virtual JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& kd) const; + const DispatchData& dispatchData) const; virtual DispatchData SetDefault(const binary_convolution_params& params, int autoTuneIndex = -1) const; static bool CheckWorkGroups(const DispatchData&); KernelsData GetCommonKernelsData(const Params& params, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp index dbbd4bc..85535b9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp @@ -43,10 +43,9 @@ ParamsKey BinaryConvolutionKernelGeneric::GetSupportedKey() const { return k; } -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDefault( - const binary_convolution_params& params, - int) const { - DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params); +BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDefault(const binary_convolution_params& params, + int) const { + DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -55,17 +54,17 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDef auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = Align(x, sub_group_size) * y; - kd.gws1 = CeilDiv(f, 2 * sub_group_size); // 1 WI calc 2 OC x 16 X - kd.gws2 = b; + dispatchData.gws[0] = Align(x, sub_group_size) * y; + dispatchData.gws[1] = CeilDiv(f, 2 * sub_group_size); // 1 WI calc 2 OC x 16 X + dispatchData.gws[2] = b; - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } bool BinaryConvolutionKernelGeneric::Validate(const Params& p, const optional_params& o) const { @@ -81,8 +80,8 @@ bool BinaryConvolutionKernelGeneric::Validate(const Params& p, const optional_pa } JitConstants BinaryConvolutionKernelGeneric::GetJitConstants(const binary_convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); auto input = params.inputs[0]; auto output = params.output; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h index fdbc153..62f0863 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.h @@ -35,9 +35,9 @@ protected: return WeightsLayout::os_is_yx_osv32_isv32p; } JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& kd) const override; + const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp index 47870bb..fad9ce3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp @@ -38,10 +38,9 @@ ParamsKey BinaryConvolutionKernelRef::GetSupportedKey() const { return k; } -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault( - const binary_convolution_params& params, - int) const { - DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params); +BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault(const binary_convolution_params& params, + int) const { + DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -50,22 +49,22 @@ BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault auto y = out.Y().v; auto x = out.X().v; - kd.gws0 = b; - kd.gws1 = f; - kd.gws2 = x * y; + dispatchData.gws[0] = b; + dispatchData.gws[1] = f; + dispatchData.gws[2] = x * y; - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - return kd; + return dispatchData; } JitConstants BinaryConvolutionKernelRef::GetJitConstants(const binary_convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); int pad_physical_val = params.pad_value == -1.0f ? 0x00000000 : 0xFFFFFFFF; int leftovers_mask = (0xFFFFFFFF >> (32 - params.inputs[0].Feature().v % 32)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h index 7ce702f..0923186 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.h @@ -35,9 +35,9 @@ protected: return WeightsLayout::os_is_yx_osv32_isv32p; } JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& kd) const override; + const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp index 5f5e414..16e1c38 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,22 +31,12 @@ JitConstants BorderKernelBase::GetJitConstants(const border_params& params) cons BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params& params) const { const auto& output = params.output; - DispatchData kd; + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + dispatchData.gws = { output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - std::vector global{output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v}; - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params, @@ -57,7 +47,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params, const auto& prim_params = static_cast(params); - auto run_info = SetDefault(prim_params); + auto dispatchData = SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -65,7 +55,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); k_data.estimatedTime = estimated_time; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp index 6c8c69f..b0b7ce1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2019 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,22 +28,12 @@ JitConstants BroadcastKernelBase::GetJitConstants(const broadcast_params& params BroadcastKernelBase::DispatchData BroadcastKernelBase::SetDefault(const broadcast_params& params) { const auto& output = params.output; - DispatchData kd; + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + dispatchData.gws = { output.X().v, output.Y().v * output.Z().v, output.Batch().v * output.Feature().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - std::vector global{output.X().v, output.Y().v * output.Z().v, output.Batch().v * output.Feature().v}; - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params, @@ -54,7 +44,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params, const auto& prim_params = static_cast(params); - auto run_info = SetDefault(prim_params); + auto dispatchData = SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -62,7 +52,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); k_data.estimatedTime = estimated_time; return {k_data}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp index 57fc050..1cc9811 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_b_fs_yx_fsv16.cpp @@ -109,23 +109,23 @@ bool ConcatenationKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional } ConcatenationKernelBase::DispatchData ConcatenationKernel_b_fs_yx_fsv16::SetDefault(const concatenation_params& params) const { - DispatchData runInfo = ConcatenationKernelBase::SetDefault(params); + DispatchData dispatchData = ConcatenationKernelBase::SetDefault(params); const auto& input = params.inputs[0]; auto tileXY = getTileXY(params); size_t tileF = params.misalignment == 0 ? 1 : 2; - runInfo.gws0 = CeilDiv(input.X().v * input.Y().v, tileXY); - runInfo.gws1 = Align(input.Feature().v, 16 * tileF) / tileF; - runInfo.gws2 = input.Batch().v; + dispatchData.gws[0] = CeilDiv(input.X().v * input.Y().v, tileXY); + dispatchData.gws[1] = Align(input.Feature().v, 16 * tileF) / tileF; + dispatchData.gws[2] = input.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = 16; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 16; + dispatchData.lws[2] = 1; - runInfo.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return runInfo; + return dispatchData; } JitConstants ConcatenationKernel_b_fs_yx_fsv16::GetJitConstants(const concatenation_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp index 0eb3fb2..b70ac9f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp @@ -69,7 +69,7 @@ JitConstants ConcatenationKernelBase::GetJitConstants(const concatenation_params } ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const concatenation_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& dims = params.inputs[0].GetDims(); auto layout = params.inputs[0].GetLayout(); @@ -80,19 +80,19 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const DataTensor::Channelndex(layout, Tensor::DataChannelName::X) }; // Determine global work sizes. - kd.gws0 = idx[2] != -1 ? dims[idx[2]].v : 1; // Y - kd.gws1 = idx[1] != -1 ? dims[idx[1]].v : 1; // F - kd.gws2 = idx[0] != -1 ? dims[idx[0]].v : 1; // B + dispatchData.gws[0] = idx[2] != -1 ? dims[idx[2]].v : 1; // Y + dispatchData.gws[1] = idx[1] != -1 ? dims[idx[1]].v : 1; // F + dispatchData.gws[2] = idx[0] != -1 ? dims[idx[0]].v : 1; // B - kd.lws0 = std::min(std::max(kd.gws0, static_cast(1)), static_cast(32)); - while (kd.gws0 % kd.lws0 != 0) { - --kd.lws0; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - kd.lws1 = 1; - kd.lws2 = 1; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - return kd; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + return dispatchData; } KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const { @@ -120,13 +120,13 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, ifm_offset += ifm; auto& kernel = kd.kernels[i]; - DispatchData runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto cldnnJit = GetJitConstants(newParams); auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i }); kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); @@ -138,7 +138,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, kernel.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v; - efficiency = std::max(efficiency, runInfo.efficiency); + efficiency = std::max(efficiency, dispatchData.efficiency); } kd.estimatedTime = efficiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp index 62e5a65..b5046ff 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.cpp @@ -67,22 +67,21 @@ bool ConcatenationKernel_depth_bfyx_no_pitch::Validate(const Params& p, const op return true; } -ConcatenationKernelBase::DispatchData ConcatenationKernel_depth_bfyx_no_pitch::SetDefault( - const concatenation_params& params) const { - DispatchData runInfo = ConcatenationKernelBase::SetDefault(params); +ConcatenationKernelBase::DispatchData ConcatenationKernel_depth_bfyx_no_pitch::SetDefault(const concatenation_params& params) const { + DispatchData dispatchData = ConcatenationKernelBase::SetDefault(params); const auto& input = params.inputs[0]; const auto batch = input.Batch().v; - runInfo.gws0 = batch; - runInfo.gws1 = Align(std::max((size_t)1, input.LogicalSize() / batch), 16 * 8) / 8; - runInfo.gws2 = 1; + dispatchData.gws[0] = batch; + dispatchData.gws[1] = Align(std::max((size_t)1, input.LogicalSize() / batch), 16 * 8) / 8; + dispatchData.gws[2] = 1; - runInfo.lws0 = 1; - runInfo.lws1 = 16; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 16; + dispatchData.lws[2] = 1; - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; - return runInfo; + return dispatchData; } KernelsData ConcatenationKernel_depth_bfyx_no_pitch::GetKernelsData(const Params& params, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp index 7eb9e19..4285f52 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp @@ -62,20 +62,20 @@ bool ConcatenationKernel_fs_b_yx_fsv32::Validate(const Params& p, const optional } ConcatenationKernelBase::DispatchData ConcatenationKernel_fs_b_yx_fsv32::SetDefault(const concatenation_params& params) const { - DispatchData runInfo = ConcatenationKernelBase::SetDefault(params); + DispatchData dispatchData = ConcatenationKernelBase::SetDefault(params); const auto& input = params.inputs[0]; - runInfo.gws0 = input.X().v; - runInfo.gws1 = input.Y().v; - runInfo.gws2 = CeilDiv(input.Feature().v, fsv) * subGroupSize * input.Batch().v; + dispatchData.gws[0] = input.X().v; + dispatchData.gws[1] = input.Y().v; + dispatchData.gws[2] = CeilDiv(input.Feature().v, fsv) * subGroupSize * input.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = subGroupSize; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = subGroupSize; - runInfo.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return runInfo; + return dispatchData; } JitConstants ConcatenationKernel_fs_b_yx_fsv32::GetJitConstants(const concatenation_params& params) const { @@ -113,13 +113,13 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para ifm_offset += ifm; auto& kernel = kd.kernels[i]; - DispatchData runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto cldnnJit = GetJitConstants(newParams); auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, optParams); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i}); kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); @@ -131,7 +131,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para kernel.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v; - efficiency = std::max(efficiency, runInfo.efficiency); + efficiency = std::max(efficiency, dispatchData.efficiency); } kd.estimatedTime = efficiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp index 36abefe..9d1fcfa 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp @@ -88,27 +88,17 @@ bool ConcatenationKernel_simple_Ref::Validate(const Params& p, const optional_pa } ConcatenationKernelBase::DispatchData ConcatenationKernel_simple_Ref::SetDefault(const concatenation_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.inputs[0]; - std::vector global; - global = { - input.X().v * input.Y().v, - input.Z().v * input.W().v, - input.Feature().v * input.Batch().v}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { input.X().v * input.Y().v, + input.Z().v * input.W().v, + input.Feature().v * input.Batch().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; // X * Y - kd.gws1 = global[1]; // Z * W - kd.gws2 = global[2]; // F * B + dispatchData.efficiency = FORCE_PRIORITY_9; - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - kd.efficiency = FORCE_PRIORITY_9; - - return kd; + return dispatchData; } KernelsData ConcatenationKernel_simple_Ref::GetKernelsData(const Params& params, const optional_params& optParams) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp index 5ea9e20..8bf5083 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.cpp @@ -34,7 +34,7 @@ ConvolutionKernel_b_fs_yx_fsv16::ConvolutionKernel_b_fs_yx_fsv16() : Convolution } ConvolutionKernel_b_fs_yx_fsv16::AutoTuneOption ConvolutionKernel_b_fs_yx_fsv16::GetAutoTuneOptions(const Params& params, - int /*autoTuneIndex*/) const { + int /*autoTuneIndex*/) const { const convolution_params& cp = static_cast(params); auto x = cp.output.X().v; auto f = cp.output.Feature().v; @@ -89,33 +89,33 @@ ParamsKey ConvolutionKernel_b_fs_yx_fsv16::GetSupportedKey() const { } ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16::SetDefault(const convolution_params& params, - int autoTuneIndex) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params); + int autoTuneIndex) const { + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params); const auto& out = params.output; auto autoTune = GetAutoTuneOptions(params, autoTuneIndex); - kd.cldnnStyle.blockWidth = autoTune.blockWidth; + dispatchData.cldnnStyle.blockWidth = autoTune.blockWidth; auto x = out.X().v; auto y = out.Y().v; auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = CeilDiv(x, autoTune.blockWidth) * y; - kd.gws1 = Align(f, sub_group_size); - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x, autoTune.blockWidth) * y; + dispatchData.gws[1] = Align(f, sub_group_size); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = sub_group_size; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; if (b == 1) - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; else - kd.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; - return kd; + return dispatchData; } bool ConvolutionKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const { @@ -155,12 +155,12 @@ bool ConvolutionKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_p } JitConstants ConvolutionKernel_b_fs_yx_fsv16::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { auto input = params.inputs[0]; auto output = params.output; - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); - auto blockWidth = runInfo.cldnnStyle.blockWidth; + auto blockWidth = dispatchData.cldnnStyle.blockWidth; if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); FusedOpsConfiguration conf_vec = { "_VEC", @@ -213,8 +213,8 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16::GetJitConstants(const convolution_ } KernelsData ConvolutionKernel_b_fs_yx_fsv16::GetTunedKernelsDataByIndex(const Params& params, - const optional_params& options, - const int autoTuneIndex) const { + const optional_params& options, + const int autoTuneIndex) const { auto tuneOptions = GetAutoTuneOptions(params, autoTuneIndex); return GetCommonKernelsData(params, options, tuneOptions.exeMode, autoTuneIndex); } @@ -224,7 +224,7 @@ KernelsData ConvolutionKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params } KernelsData ConvolutionKernel_b_fs_yx_fsv16::GetKernelsDataForAutoTune(const Params& params, - const optional_params& options) const { + const optional_params& options) const { if (!Validate(params, options)) { return {}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h index b371a02..ca6a784 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16.h @@ -49,7 +49,7 @@ protected: bool NeedPaddedInput() const override { return false; } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; private: struct AutoTuneOption { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp index c3b1084..7d9a70a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp @@ -34,7 +34,7 @@ ConvolutionKernel_b_fs_yx_fsv16_1x1::ConvolutionKernel_b_fs_yx_fsv16_1x1() : Con } ConvolutionKernel_b_fs_yx_fsv16_1x1::AutoTuneOption ConvolutionKernel_b_fs_yx_fsv16_1x1::GetAutoTuneOptions(const Params& params, - int /*autoTuneIndex*/) const { + int /*autoTuneIndex*/) const { const convolution_params& cp = static_cast(params); auto x = cp.output.X().v; auto f = cp.output.Feature().v; @@ -73,10 +73,10 @@ ParamsKey ConvolutionKernel_b_fs_yx_fsv16_1x1::GetSupportedKey() const { ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16_1x1::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params); auto autoTune = GetAutoTuneOptions(params, autoTuneIndex); - kd.cldnnStyle.blockWidth = autoTune.blockWidth; + dispatchData.cldnnStyle.blockWidth = autoTune.blockWidth; const auto& input = params.inputs[0]; const auto& out = params.output; @@ -85,29 +85,29 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16_1x1::SetDefa auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = CeilDiv(x * y, autoTune.blockWidth); - kd.gws1 = Align(f, feature_block_size); - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x * y, autoTune.blockWidth); + dispatchData.gws[1] = Align(f, feature_block_size); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = sub_group_size; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; auto bBlockSizeX = x % autoTune.blockWidth == 0; auto bBlockSizeXY = out.X().pad.Total() + out.Y().pad.Total() == 0; auto bInputPad = input.X().pad.Total() + input.Y().pad.Total() != 0; - + if (b == 1) { if ((bBlockSizeX || bBlockSizeXY) && !bInputPad) { - kd.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; } else { - kd.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; } } else { - kd.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; } - return kd; + return dispatchData; } bool ConvolutionKernel_b_fs_yx_fsv16_1x1::Validate(const Params& p, const optional_params& o) const { @@ -134,10 +134,10 @@ bool ConvolutionKernel_b_fs_yx_fsv16_1x1::Validate(const Params& p, const option } JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - auto blockWidth = runInfo.cldnnStyle.blockWidth; + auto blockWidth = dispatchData.cldnnStyle.blockWidth; if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); FusedOpsConfiguration conf_vec = { "_VEC", diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h index ff547e4..e514751 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.h @@ -43,7 +43,7 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; struct AutoTuneOption { size_t blockWidth; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp index e2766a5..82a92fa 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp @@ -70,28 +70,29 @@ bool ConvolutionKernel_b_fs_yx_fsv16_depthwise::Validate(const Params& p, const } ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv16_depthwise::SetDefault(const convolution_params& params, - int) const { - DispatchData runInfo = Parent::SetDefault(params); + int) const { + DispatchData dispatchData = Parent::SetDefault(params); const auto& out = params.output; - runInfo.gws0 = CeilDiv(out.X().v, x_block_size) * out.Y().v; - runInfo.gws1 = Align(out.Feature().v, feature_block_size); - runInfo.gws2 = out.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = sub_group_size; - runInfo.lws2 = 1; + dispatchData.gws[0] = CeilDiv(out.X().v, x_block_size) * out.Y().v; + dispatchData.gws[1] = Align(out.Feature().v, feature_block_size); + dispatchData.gws[2] = out.Batch().v; + + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; if (out.Batch().v == 1) - runInfo.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; else - runInfo.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_b_fs_yx_fsv16_depthwise::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); const size_t block_width = 8; @@ -129,7 +130,7 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_depthwise::GetJitConstants(const co } KernelsData ConvolutionKernel_b_fs_yx_fsv16_depthwise::GetKernelsData(const Params& params, - const optional_params& options) const { + const optional_params& options) const { return GetCommonKernelsData(params, options); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h index 69a4073..d2d1b3d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.h @@ -41,7 +41,7 @@ protected: } bool NeedPaddedInput() const override { return true; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp index 64144f2..148d91b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp @@ -76,11 +76,11 @@ ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetSupportedKey() const { } JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); - mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_SPATIAL", kd.cldnnStyle.blockWidth)); - mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_FEATURES", kd.cldnnStyle.blockHeight)); - mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", kd.cldnnStyle.prefetch)); + const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); + mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_SPATIAL", dispatchData.cldnnStyle.blockWidth)); + mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_FEATURES", dispatchData.cldnnStyle.blockHeight)); + mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", dispatchData.cldnnStyle.prefetch)); mem_consts.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); mem_consts.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); @@ -106,27 +106,27 @@ JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetJitConstants(const co ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::SetDefault(const convolution_params& params, int index) const { - DispatchData kd; + DispatchData dispatchData; const auto& output = params.output; auto tune_params = GetAutoTuneParams(params, index); size_t k_slices = tune_params.feature_slm_split; - kd.gws0 = CeilDiv(output.X().v * output.Y().v, tune_params.out_block_spatial); - kd.gws1 = CeilDiv(output.Feature().v, tune_params.out_block_features * simd) * simd * k_slices; - kd.gws2 = output.Batch().v; + dispatchData.gws[0] = CeilDiv(output.X().v * output.Y().v, tune_params.out_block_spatial); + dispatchData.gws[1] = CeilDiv(output.Feature().v, tune_params.out_block_features * simd) * simd * k_slices; + dispatchData.gws[2] = output.Batch().v; - kd.lws0 = 1; - kd.lws1 = simd * k_slices; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = simd * k_slices; + dispatchData.lws[2] = 1; - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; + dispatchData.cldnnStyle = {0, 0, 0, 0, 0}; + dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0}; - kd.cldnnStyle.blockWidth = tune_params.out_block_spatial; - kd.cldnnStyle.blockHeight = tune_params.out_block_features; - kd.cldnnStyle.prefetch = k_slices; + dispatchData.cldnnStyle.blockWidth = tune_params.out_block_spatial; + dispatchData.cldnnStyle.blockHeight = tune_params.out_block_features; + dispatchData.cldnnStyle.prefetch = k_slices; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; auto in_f = params.weights.IFM().v; auto out_f = params.weights.OFM().v; @@ -158,14 +158,14 @@ ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::S general_is_faster |= in_f == 256 && out_f == 128 && out_x == 3 && out_y == 3 && batch == 1; if (general_is_faster && !x_strided) { - kd.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; } // Better to use kernel with 4 input features in a loop if (static_cast(params.weights.IFM().v) / static_cast(Align(params.weights.IFM().v, fsv)) < 0.5f) - kd.efficiency = FORCE_PRIORITY_4; + dispatchData.efficiency = FORCE_PRIORITY_4; - return kd; + return dispatchData; } // SetDefault bool Convolution_kernel_b_fs_yx_fsv16_imad_1x1::Validate(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h index 44f3f4a..90c5da2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h @@ -35,7 +35,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp index 8b43b59..47f1fbe 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp @@ -47,20 +47,20 @@ ParamsKey ConvolutionKernel_b_fs_yx_fsv4_int8::GetSupportedKey() const { } ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_yx_fsv4_int8::SetDefault(const convolution_params& cp, int) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; if (cp.output.X().v > 512 && cp.filterSize.x == 5 && cp.filterSize.y == 5) - runInfo.efficiency = FORCE_PRIORITY_2; - runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 2; - runInfo.gws1 = cp.output.Y().v; - runInfo.gws2 = sub_group_size; + dispatchData.efficiency = FORCE_PRIORITY_2; + dispatchData.gws[0] = CeilDiv(cp.output.X().v, sub_group_size) / 2; + dispatchData.gws[1] = cp.output.Y().v; + dispatchData.gws[2] = sub_group_size; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = sub_group_size; - return runInfo; + return dispatchData; } bool ConvolutionKernel_b_fs_yx_fsv4_int8::Validate(const Params& p, const optional_params& o) const { @@ -85,10 +85,10 @@ bool ConvolutionKernel_b_fs_yx_fsv4_int8::Validate(const Params& p, const option return true; } -JitConstants ConvolutionKernel_b_fs_yx_fsv4_int8::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); +JitConstants ConvolutionKernel_b_fs_yx_fsv4_int8::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2])); jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h index 9cbc775..b4e8bbd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.h @@ -34,7 +34,7 @@ protected: return WeightsLayout::os_is_yx_osv16_isv4; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp index d3f3a47..4b8053e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp @@ -242,7 +242,7 @@ bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::ValidateAutoTuneParams(const c ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::DispatchData ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd; + DispatchData dispatchData; auto& out = params.output; auto tune_params = GetAutoTuneParams(params, autoTuneIndex); @@ -254,29 +254,21 @@ ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::SetDefault(const convolution_params fsv = 32; } - std::vector global = { + dispatchData.gws = { Align(CeilDiv(out.X().v, tune_params.tile_x), tune_params.lws0), - Align(out.Y().v, tune_params.lws1), + Align(out.Y().v, tune_params.lws1), CeilDiv(out.Feature().v, fsv) * tune_params.simd * out.Batch().v }; - std::vector local = { tune_params.lws0, tune_params.lws1, tune_params.simd }; + dispatchData.lws = { tune_params.lws0, tune_params.lws1, tune_params.simd }; - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.gemmStyle = { 0, 0, 0, 0, 0, 0 }; - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.cldnnStyle.blockWidth = tune_params.tile_x; + dispatchData.cldnnStyle.prefetch = tune_params.preload_input_slm; - kd.gemmStyle = { 0, 0, 0, 0, 0, 0 }; + dispatchData.efficiency = params.stride.x == 1 ? FORCE_PRIORITY_1 : FORCE_PRIORITY_2; - kd.cldnnStyle.blockWidth = tune_params.tile_x; - kd.cldnnStyle.prefetch = tune_params.preload_input_slm; - - kd.efficiency = params.stride.x == 1 ? FORCE_PRIORITY_1 : FORCE_PRIORITY_2; - - return kd; + return dispatchData; } bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::HasPaddedInput(const convolution_params& params) const { @@ -317,20 +309,20 @@ bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::ParamsHavePadding(const convol return needs_pad; } -JitConstants ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetJitConstants(const convolution_params& params, const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); +JitConstants ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); constexpr size_t imad_width = 4; auto filter_spatial = params.weights.X().v * params.weights.Y().v; auto filter_blocked = filter_spatial / imad_width * imad_width; - mem_consts.AddConstant(MakeJitConstant("LWS0", kd.lws0)); - mem_consts.AddConstant(MakeJitConstant("LWS1", kd.lws1)); - mem_consts.AddConstant(MakeJitConstant("SIMD", kd.lws2)); + mem_consts.AddConstant(MakeJitConstant("LWS0", dispatchData.lws[0])); + mem_consts.AddConstant(MakeJitConstant("LWS1", dispatchData.lws[1])); + mem_consts.AddConstant(MakeJitConstant("SIMD", dispatchData.lws[2])); - mem_consts.AddConstant(MakeJitConstant("TILE_X", kd.cldnnStyle.blockWidth)); + mem_consts.AddConstant(MakeJitConstant("TILE_X", dispatchData.cldnnStyle.blockWidth)); mem_consts.AddConstant(MakeJitConstant("FILTER_BLOCKED", filter_blocked)); - mem_consts.AddConstant(MakeJitConstant("PRELOAD_INPUT_TO_SLM", kd.cldnnStyle.prefetch)); + mem_consts.AddConstant(MakeJitConstant("PRELOAD_INPUT_TO_SLM", dispatchData.cldnnStyle.prefetch)); auto needs_boundary_check = ParamsHavePadding(params) && (!HasPaddedInput(params) || diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp index 31fe412..d191db2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp @@ -43,7 +43,7 @@ protected: bool NeedPaddedInput() const override { return false; } bool HasPaddedInput(const convolution_params& params) const; bool ParamsHavePadding(const convolution_params& params) const; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; struct AutoTuneParams { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp index 4011302..19d0398 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp @@ -104,7 +104,7 @@ ParamsKey ConvolutionKernel_b_fs_zyx_fsv16::GetSupportedKey() const { ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_zyx_fsv16::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params, autoTuneIndex); const auto& out = params.output; const auto& input = params.inputs[0]; @@ -130,36 +130,36 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_zyx_fsv16::SetDefault else break; } - kd.cldnnStyle.blockWidth = ow_block; + dispatchData.cldnnStyle.blockWidth = ow_block; if (out.GetDType() == Datatype::F16) { - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.gws0 = (f / 2); - kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z; - kd.gws2 = b % 2 == 0 ? b / 2 : b; // unroll mb by 2 + dispatchData.gws[0] = (f / 2); + dispatchData.gws[1] = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z; + dispatchData.gws[2] = b % 2 == 0 ? b / 2 : b; // unroll mb by 2 } else { - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; auto ocb = (f % 32 == 0) ? 32 : 16; - kd.gws0 = 16; - kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z; - kd.gws2 = b * f / ocb; + dispatchData.gws[0] = 16; + dispatchData.gws[1] = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z; + dispatchData.gws[2] = b * f / ocb; } } else if (ver_16mb16c) { f = (g > 1) ? f/g : Align(f, 16); - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.gws0 = f; - kd.gws1 = x * y * z; - kd.gws2 = (out.GetDType() == Datatype::F16) ? b / 32 : b / 16; + dispatchData.gws[0] = f; + dispatchData.gws[1] = x * y * z; + dispatchData.gws[2] = (out.GetDType() == Datatype::F16) ? b / 32 : b / 16; - kd.cldnnStyle.blockWidth = 1; + dispatchData.cldnnStyle.blockWidth = 1; } else { auto oh_block = 1; f = Align(f / g, 16); @@ -180,22 +180,22 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_b_fs_zyx_fsv16::SetDefault ocb /= 2; } - kd.cldnnStyle.blockWidth = ow_block; + dispatchData.cldnnStyle.blockWidth = ow_block; - kd.gws0 = ocb; - kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z; - kd.gws2 = b * (f / ocb) * g; + dispatchData.gws[0] = ocb; + dispatchData.gws[1] = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z; + dispatchData.gws[2] = b * (f / ocb) * g; - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } if (b == 1) - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; else - kd.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; - return kd; + return dispatchData; } bool ConvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_params& o) const { @@ -231,10 +231,10 @@ bool ConvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_ } JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { auto input = params.inputs[0]; auto output = params.output; - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); const bool is_1stconv = input.Feature().v == 3 && input.GetLayout() == DataLayout::bfzyx; const bool ver_16mb16c = !is_1stconv && ((output.GetDType() == Datatype::F16 && output.Batch().v % 32 == 0) || @@ -253,9 +253,9 @@ JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution else jit.AddConstant(MakeJitConstant("CASE_3D", 1)); - jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0)); - jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1)); - jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2)); + jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0])); + jit.AddConstant(MakeJitConstant("LWS_1", dispatchData.lws[1])); + jit.AddConstant(MakeJitConstant("LWS_2", dispatchData.lws[2])); if (is_1stconv) { if (output.GetDType() == Datatype::F16) { @@ -267,11 +267,11 @@ JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution } else if (ver_16mb16c) { jit.AddConstant(MakeJitConstant("OCB", 1)); } else { - jit.AddConstant(MakeJitConstant("OCB", runInfo.gws0)); + jit.AddConstant(MakeJitConstant("OCB", dispatchData.gws[0])); } jit.AddConstant(MakeJitConstant("SUM_SCALE", 1)); - auto blockWidth = runInfo.cldnnStyle.blockWidth; + auto blockWidth = dispatchData.cldnnStyle.blockWidth; if (ver_16mb16c) { jit.AddConstant(MakeJitConstant("MB_BLOCK", 16)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h index 19fa02c..cd94731 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.h @@ -55,7 +55,7 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::ELTWISE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp index cfd6abd..82b1252 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp @@ -204,7 +204,7 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params& size_t in_block_depth = 1; bool break_external_loop = false; - + for (size_t d = 1; d < 16; ++d) { if (params.output.Z().v % d != 0) continue; @@ -283,7 +283,7 @@ float Convolution_kernel_b_fs_zyx_fsv16_imad::EstimateOccupancy(const convolutio } float Convolution_kernel_b_fs_zyx_fsv16_imad::EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const { - size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_depth * + size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_depth * block.output_block_features * (block.feature_slm_split - 1); size_t slm_bytes = slm_elements * BytesPerElement(GetAccumulatorType(params)); @@ -331,8 +331,8 @@ KernelsData Convolution_kernel_b_fs_zyx_fsv16_imad::GetKernelsData(const Params& } JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); auto block_params = GetBlockParams(params); @@ -369,7 +369,7 @@ JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convo idx_order[idx_order.size() - 3] = "out_z"; } } - + if (block_params.output_block_height != 1) { loop_axes.push_back(Tensor::DataChannelName::Y); } else { @@ -392,28 +392,28 @@ JitConstants Convolution_kernel_b_fs_zyx_fsv16_imad::GetJitConstants(const convo } // GetJitConstants ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_zyx_fsv16_imad::SetDefault(const convolution_params& params, - int) const { - DispatchData kd; + int) const { + DispatchData dispatchData; const auto& output = params.output; const auto& weights = params.weights; auto block_params = GetBlockParams(params); - kd.gws0 = CeilDiv(output.X().v, block_params.output_block_width); - kd.gws1 = CeilDiv(output.Y().v, block_params.output_block_height) * CeilDiv(output.Z().v, block_params.output_block_depth); - kd.gws2 = output.Batch().v * CeilDiv(weights.OFM().v, block_params.output_block_features) * params.groups * simd * block_params.feature_slm_split; + dispatchData.gws[0] = CeilDiv(output.X().v, block_params.output_block_width); + dispatchData.gws[1] = CeilDiv(output.Y().v, block_params.output_block_height) * CeilDiv(output.Z().v, block_params.output_block_depth); + dispatchData.gws[2] = output.Batch().v * CeilDiv(weights.OFM().v, block_params.output_block_features) * params.groups * simd * block_params.feature_slm_split; - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = simd * block_params.feature_slm_split; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = simd * block_params.feature_slm_split; - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; + dispatchData.cldnnStyle = {0, 0, 0, 0, 0}; + dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0}; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; if (static_cast(params.weights.IFM().v) / static_cast(Align(params.weights.IFM().v, fsv)) < 0.5f) - kd.efficiency = FORCE_PRIORITY_4; + dispatchData.efficiency = FORCE_PRIORITY_4; - return kd; + return dispatchData; } // SetDefault bool Convolution_kernel_b_fs_zyx_fsv16_imad::Validate(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h index bdde4a5..35427cb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.h @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } WeightsLayout GetPreferredWeightsLayout(const convolution_params& p) const override { @@ -50,7 +50,7 @@ protected: size_t output_block_width; size_t output_block_height; size_t output_block_depth; - + size_t output_block_features; size_t input_block_width; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp index 11088e2..5386cc3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp @@ -46,9 +46,9 @@ bool ConvolutionKernelBase::Validate(const Params& p, const optional_params& o) return true; } -JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& params, const DispatchData& kd) const { +JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params); - mem_consts.Merge(GetFusedPrimitivesJitConstants(params, kd)); + mem_consts.Merge(GetFusedPrimitivesJitConstants(params, dispatchData)); const auto& padding = params.padding; const auto& input = params.inputs[0]; @@ -101,12 +101,12 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa std::vector unrollLoopParams{params.filterSize.x, params.filterSize.y, - (uint32_t)kd.gemmStyle.globalWorkSizeDX, - (uint32_t)kd.gemmStyle.globalWorkSizeDY, - (uint32_t)kd.gemmStyle.globalWorkSizeDZ, - (uint32_t)kd.gemmStyle.subBlockDimM, - (uint32_t)kd.gemmStyle.subBlockDimK, - (uint32_t)kd.gemmStyle.subBlockDimN}; + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDX, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDY, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ, + (uint32_t)dispatchData.gemmStyle.subBlockDimM, + (uint32_t)dispatchData.gemmStyle.subBlockDimK, + (uint32_t)dispatchData.gemmStyle.subBlockDimN}; auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end()); @@ -116,13 +116,15 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa return mem_consts; } -bool ConvolutionKernelBase::CheckWorkGroups(const ConvolutionKernelBase::DispatchData& kd) { - if (kd.gws0 == 0 || kd.gws1 == 0 || kd.gws2 == 0 || kd.lws0 == 0 || kd.lws1 == 0 || kd.lws2 == 0) { +bool ConvolutionKernelBase::CheckWorkGroups(const ConvolutionKernelBase::DispatchData& dispatchData) { + if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3) return false; - } - if ((kd.gws0 % kd.lws0) != 0 || (kd.gws1 % kd.lws1) != 0 || (kd.gws2 % kd.lws2) != 0) { - return false; + for (size_t i = 0; i < dispatchData.gws.size(); i++) { + if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0) + return false; + if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0) + return false; } return true; @@ -164,43 +166,33 @@ bool ConvolutionKernelBase::CheckPitchForSplitOnly(const convolution_params& par } ConvolutionKernelBase::DispatchData ConvolutionKernelBase::SetDefault(const convolution_params& params, int) const { - DispatchData kd; + DispatchData dispatchData; const auto& out = params.output; - kd.fp16UnitUsed = out.GetDType() == Datatype::F16; - std::vector global; if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf) { - global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v}; + dispatchData.gws = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v}; } else if (params.output.GetLayout() == DataLayout::bfzyx) { - global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; + dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; } else { - global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v}; + dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v}; } - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - kd.cldnnStyle.blockWidth = 1; - kd.cldnnStyle.blockHeight = 1; - kd.cldnnStyle.prefetch = 0; - kd.cldnnStyle.inputBlockArraySize = 0; - kd.cldnnStyle.inputBlockWidth = 0; - - kd.gemmStyle.globalWorkSizeDX = 1; - kd.gemmStyle.globalWorkSizeDY = 1; - kd.gemmStyle.globalWorkSizeDZ = 1; - kd.gemmStyle.subBlockDimK = 1; - kd.gemmStyle.subBlockDimM = 0; - kd.gemmStyle.subBlockDimN = 0; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - return kd; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + + dispatchData.cldnnStyle.blockWidth = 1; + dispatchData.cldnnStyle.blockHeight = 1; + dispatchData.cldnnStyle.prefetch = 0; + dispatchData.cldnnStyle.inputBlockArraySize = 0; + dispatchData.cldnnStyle.inputBlockWidth = 0; + + dispatchData.gemmStyle.globalWorkSizeDX = 1; + dispatchData.gemmStyle.globalWorkSizeDY = 1; + dispatchData.gemmStyle.globalWorkSizeDZ = 1; + dispatchData.gemmStyle.subBlockDimK = 1; + dispatchData.gemmStyle.subBlockDimM = 0; + dispatchData.gemmStyle.subBlockDimN = 0; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + return dispatchData; } KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params, @@ -232,21 +224,21 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params, if (kd.reorderInput && !options.allowInputReordering) return {}; } - DispatchData runInfo = SetDefault(newParams, autoTuneIndex); + DispatchData dispatchData = SetDefault(newParams, autoTuneIndex); - if (!CheckWorkGroups(runInfo)) { + if (!CheckWorkGroups(dispatchData)) { // Internal Error - wrong calculation of global/local work group sizes return {}; } auto finalKernelName = GetKernelName(newParams); - auto cldnnJit = GetJitConstants(newParams, runInfo); + auto cldnnJit = GetJitConstants(newParams, dispatchData); auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, finalKernelName, jit, @@ -276,7 +268,7 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params, } kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; kd.autoTuneIndex = autoTuneIndex; return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h index d64f681..24bbbba 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h @@ -66,8 +66,8 @@ protected: virtual std::string GetKernelName(const convolution_params&) const { return kernelName; } virtual bool NeedPaddedInput() const { return false; } bool Validate(const Params& p, const optional_params& o) const override; - virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const; - virtual JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& kd) const; + virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const; + virtual JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& dispatchData) const; virtual DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const; static bool CheckWorkGroups(const DispatchData&); static bool CheckPitchForSplitOnly(const convolution_params& params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp index b016fe7..c7a0b9f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp @@ -41,7 +41,7 @@ ParamsKey ConvolutionKernel_bfyx_1x1::GetSupportedKey() const { } ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1::SetDefault(const convolution_params& params, int) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -50,17 +50,17 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1::SetDefault(const auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = Align(x * y, 16) / 16; - kd.gws1 = Align(f, 16); - kd.gws2 = b; + dispatchData.gws[0] = Align(x * y, 16) / 16; + dispatchData.gws[1] = Align(f, 16); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = 16; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 16; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } bool ConvolutionKernel_bfyx_1x1::Validate(const Params& p, const optional_params& o) const { @@ -86,8 +86,8 @@ bool ConvolutionKernel_bfyx_1x1::Validate(const Params& p, const optional_params return true; } -JitConstants ConvolutionKernel_bfyx_1x1::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); +JitConstants ConvolutionKernel_bfyx_1x1::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); if (params.output.Feature().v % 16) jit.AddConstant(MakeJitConstant("LEFTOVERS", 1)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h index 62d5cb2..fb4d626 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h @@ -36,6 +36,6 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp index ac2ac40..c15ffcb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp @@ -32,7 +32,7 @@ ParamsKey ConvolutionKernel_bfyx_1x1_gemm_buf::GetSupportedKey() const { } ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1_gemm_buf::SetDefault(const convolution_params& params, int) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -41,17 +41,17 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_1x1_gemm_buf::SetDefa auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = Align(f, 16); - kd.gws1 = CeilDiv(x * y, 16); - kd.gws2 = b; + dispatchData.gws[0] = Align(f, 16); + dispatchData.gws[1] = CeilDiv(x * y, 16); + dispatchData.gws[2] = b; - kd.lws0 = 16; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return kd; + return dispatchData; } bool ConvolutionKernel_bfyx_1x1_gemm_buf::Validate(const Params& p, const optional_params& o) const { @@ -75,8 +75,8 @@ bool ConvolutionKernel_bfyx_1x1_gemm_buf::Validate(const Params& p, const option return true; } -JitConstants ConvolutionKernel_bfyx_1x1_gemm_buf::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); +JitConstants ConvolutionKernel_bfyx_1x1_gemm_buf::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); const auto& out = params.output; const auto& input = params.inputs[0]; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h index 1b7b7bc..de75aca 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h @@ -36,6 +36,6 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp index 2537828..d8850b9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp @@ -76,24 +76,24 @@ static block_params get_out_block_size(const convolution_params& p) { ConvolutionKernelBase::DispatchData convolution_kernel_bfyx_1x1_opt::SetDefault(const convolution_params& cp, int) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); constexpr size_t sub_group_size = 8; - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; auto block = get_out_block_size(cp); - runInfo.gws0 = cp.output.X().v / block.out_width; - runInfo.gws1 = cp.output.Y().v / block.out_height; - runInfo.gws2 = - 2 * (cp.output.Feature().v * cp.output.Batch().v) / block.out_depth; // process 8 output channels per Workitem + dispatchData.gws[0] = cp.output.X().v / block.out_width; + dispatchData.gws[1] = cp.output.Y().v / block.out_height; + // process 8 output channels per Workitem + dispatchData.gws[2] = 2 * (cp.output.Feature().v * cp.output.Batch().v) / block.out_depth; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 2 * sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 2 * sub_group_size; - return runInfo; + return dispatchData; } bool convolution_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_params& o) const { @@ -128,8 +128,8 @@ bool convolution_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_p } JitConstants convolution_kernel_bfyx_1x1_opt::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); auto block = get_out_block_size(params); jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h index ce8f0a0..9ce8b9e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h @@ -31,9 +31,9 @@ public: protected: WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp index 48d0cf2..45c57a8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp @@ -71,9 +71,8 @@ bool ConvolutionKernel_bfyx_3x3_dw_opt::Validate(const Params& p, const optional return true; } -ConvolutionKernel_bfyx_3x3_dw_opt::AutoTuneOption ConvolutionKernel_bfyx_3x3_dw_opt::GetAutoTuneOptions( - const Params&, - int autoTuneIndex) const { +ConvolutionKernel_bfyx_3x3_dw_opt::AutoTuneOption ConvolutionKernel_bfyx_3x3_dw_opt::GetAutoTuneOptions(const Params&, + int autoTuneIndex) const { if ((autoTuneIndex >= 0) && (autoTuneIndex < static_cast(autoTuneOptions.size()))) { return autoTuneOptions[autoTuneIndex]; } @@ -87,7 +86,7 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefaul int autoTuneIndex) const { constexpr int simdSize = 16; - DispatchData runInfo = Parent::SetDefault(params); + DispatchData dispatchData = Parent::SetDefault(params); auto options = GetAutoTuneOptions(params, autoTuneIndex); @@ -96,28 +95,28 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefaul const int numTilesY = static_cast( std::ceil(static_cast(params.inputs[0].Y().v) / static_cast(options.tileDims.y))); - runInfo.cldnnStyle.blockWidth = options.tileDims.x; - runInfo.cldnnStyle.blockHeight = options.tileDims.y; - runInfo.gws0 = numTilesX * simdSize; - runInfo.gws1 = numTilesY; - runInfo.gws2 = params.inputs[0].Feature().v * params.inputs[0].Batch().v; - runInfo.lws0 = simdSize; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.cldnnStyle.blockWidth = options.tileDims.x; + dispatchData.cldnnStyle.blockHeight = options.tileDims.y; + dispatchData.gws[0] = numTilesX * simdSize; + dispatchData.gws[1] = numTilesY; + dispatchData.gws[2] = params.inputs[0].Feature().v * params.inputs[0].Batch().v; + dispatchData.lws[0] = simdSize; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - runInfo.efficiency = FORCE_PRIORITY_5; + dispatchData.efficiency = FORCE_PRIORITY_5; - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_bfyx_3x3_dw_opt::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - stSize tileDims = {kd.cldnnStyle.blockWidth, kd.cldnnStyle.blockHeight}; - auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + stSize tileDims = {dispatchData.cldnnStyle.blockWidth, dispatchData.cldnnStyle.blockHeight}; + auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData); if (tileDims.y != 0 && tileDims.x != 0) { - mem_consts.AddConstant(MakeJitConstant("UNIT_BYTE_SIZE", kd.fp16UnitUsed ? sizeof(short) : sizeof(float))); - mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", kd.lws0)); + mem_consts.AddConstant(MakeJitConstant("UNIT_BYTE_SIZE", BytesPerElement(params.output.GetDType()))); + mem_consts.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0])); mem_consts.AddConstant(MakeJitConstant("TILE_HEIGHT", tileDims.y)); mem_consts.AddConstant(MakeJitConstant("TILE_WIDTH", tileDims.x)); } @@ -132,9 +131,9 @@ KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const KernelData kd = KernelData::Default(params); convolution_params& convParams = *static_cast(kd.params.get()); - DispatchData runInfo = SetDefault(convParams, autoTuneIndex); + DispatchData dispatchData = SetDefault(convParams, autoTuneIndex); - if (static_cast(static_cast(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize > + if (static_cast(static_cast(dispatchData.gws[0] - 1) / simdSize) * dispatchData.cldnnStyle.blockWidth + simdSize > convParams.inputs[0].Y().pitch) { // Internal Error - requested tile size is not supported for y pitch return {}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h index 16c2735..c8e5285 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h @@ -38,7 +38,7 @@ protected: WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { return WeightsLayout::oiyx; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; struct AutoTuneOption { @@ -49,4 +49,4 @@ protected: AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const; std::vector autoTuneOptions = {}; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp index 8046070..4b3709f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp @@ -56,29 +56,22 @@ bool ConvolutionKernel_bfyx_depthwise_weights_lwg::Validate(const Params& p, con return true; } -ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault( - const convolution_params& params, - int) const { - DispatchData runInfo = Parent::SetDefault(params); +ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault(const convolution_params& params, + int) const { + DispatchData dispatchData = Parent::SetDefault(params); const auto& out = params.output; - std::vector global = {out.X().v * out.Y().v, out.Feature().v, out.Batch().v}; + dispatchData.gws = { Align(out.X().v * out.Y().v, 16), out.Feature().v, out.Batch().v }; + dispatchData.lws = { 16, 1, 1 }; - runInfo.gws0 = Align(global[0], 16); - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - runInfo.lws0 = 16; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.efficiency = FORCE_PRIORITY_2; - runInfo.efficiency = FORCE_PRIORITY_2; - - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_bfyx_depthwise_weights_lwg::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData); if (params.padding.x != 0 || params.padding.y != 0) mem_consts.AddConstant(MakeJitConstant("BOUNDARY_CHECK", 1)); @@ -90,4 +83,4 @@ KernelsData ConvolutionKernel_bfyx_depthwise_weights_lwg::GetKernelsData(const P const optional_params& options) const { return GetTunedKernelsDataByIndex(params, options); } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h index 0aa4b4c..796d45f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h @@ -34,7 +34,7 @@ protected: WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { return WeightsLayout::goiyx; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp index df57f4f..2c5849f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp @@ -38,16 +38,16 @@ ParamsKey ConvolutionKernel_bfyx_Direct_10_10_12::GetSupportedKey() const { } JitConstants ConvolutionKernel_bfyx_Direct_10_10_12::GetJitConstants(const convolution_params& cp, - const DispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(cp, runInfo); + const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(cp, dispatchData); jit.AddConstants({ - MakeJitConstant("ALIGNED_OFM", RoundUp(cp.output.Feature().v / cp.groups, runInfo.gemmStyle.subBlockDimN) * cp.groups), - MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(cp.output.Feature().v / cp.groups, runInfo.gemmStyle.subBlockDimN)), - MakeJitConstant("DX", runInfo.gemmStyle.globalWorkSizeDX), - MakeJitConstant("DY", runInfo.gemmStyle.globalWorkSizeDY), + MakeJitConstant("ALIGNED_OFM", RoundUp(cp.output.Feature().v / cp.groups, dispatchData.gemmStyle.subBlockDimN) * cp.groups), + MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(cp.output.Feature().v / cp.groups, dispatchData.gemmStyle.subBlockDimN)), + MakeJitConstant("DX", dispatchData.gemmStyle.globalWorkSizeDX), + MakeJitConstant("DY", dispatchData.gemmStyle.globalWorkSizeDY), MakeJitConstant("KERNEL_SLICE_DIV2", (cp.filterSize.x * cp.filterSize.y) / 2), - MakeJitConstant("RIGHT_PARTIAL_TILE_K", cp.output.X().v % runInfo.gemmStyle.globalWorkSizeDX), + MakeJitConstant("RIGHT_PARTIAL_TILE_K", cp.output.X().v % dispatchData.gemmStyle.globalWorkSizeDX), MakeJitConstant("INPUT_BUFFER_WIDTH_PADDED", ""), // TODO: enable non padding path again MakeJitConstant("INPUT_BUFFER_HEIGHT_PADDED", ""), }); @@ -55,30 +55,29 @@ JitConstants ConvolutionKernel_bfyx_Direct_10_10_12::GetJitConstants(const convo return jit; } -ConvolutionKernel_bfyx_Direct_10_10_12::Parent::DispatchData ConvolutionKernel_bfyx_Direct_10_10_12::SetDefault( - const convolution_params& arg, - int) const { - Parent::DispatchData runInfo = Parent::SetDefault(arg); +ConvolutionKernel_bfyx_Direct_10_10_12::DispatchData ConvolutionKernel_bfyx_Direct_10_10_12::SetDefault(const convolution_params& arg, + int) const { + DispatchData dispatchData = Parent::SetDefault(arg); constexpr uint32_t TILE_N = 16; if (arg.filterSize.x == 5) { - runInfo.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 4, 1}; + dispatchData.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 4, 1}; } else { - runInfo.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 3, 1}; + dispatchData.gemmStyle = {1, 1, TILE_N, /*GWS DX*/ 4, /*GWS DY*/ 3, 1}; } - runInfo.gws0 = RoundUp(arg.output.X().v, runInfo.gemmStyle.globalWorkSizeDX) / runInfo.gemmStyle.globalWorkSizeDX; - runInfo.gws1 = RoundUp(arg.output.Y().v, runInfo.gemmStyle.globalWorkSizeDY) / runInfo.gemmStyle.globalWorkSizeDY; - runInfo.gws2 = RoundUp(arg.output.Feature().v / arg.groups, TILE_N) * arg.output.Batch().v * arg.groups; + dispatchData.gws[0] = RoundUp(arg.output.X().v, dispatchData.gemmStyle.globalWorkSizeDX) / dispatchData.gemmStyle.globalWorkSizeDX; + dispatchData.gws[1] = RoundUp(arg.output.Y().v, dispatchData.gemmStyle.globalWorkSizeDY) / dispatchData.gemmStyle.globalWorkSizeDY; + dispatchData.gws[2] = RoundUp(arg.output.Feature().v / arg.groups, TILE_N) * arg.output.Batch().v * arg.groups; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = TILE_N; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = TILE_N; - runInfo.efficiency = FORCE_PRIORITY_4; + dispatchData.efficiency = FORCE_PRIORITY_4; - return runInfo; + return dispatchData; } bool ConvolutionKernel_bfyx_Direct_10_10_12::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h index 1f5a0f4..f40dd23 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h @@ -34,9 +34,9 @@ protected: return (p.groups > 1) ? WeightsLayout::gi_yxs_os_yxsv2_osv16 : WeightsLayout::i_yxs_os_yxsv2_osv16; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp index f4fd240..97d7a37 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp @@ -49,21 +49,21 @@ std::string ConvolutionKernel_bfyx_GEMMLike::GetKernelName(const convolution_par } JitConstants ConvolutionKernel_bfyx_GEMMLike::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstants({ - MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(params.output.Feature().v / params.groups, runInfo.gemmStyle.subBlockDimN)), - MakeJitConstant("DX", runInfo.gemmStyle.globalWorkSizeDX), - MakeJitConstant("DY", runInfo.gemmStyle.globalWorkSizeDY), + MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(params.output.Feature().v / params.groups, dispatchData.gemmStyle.subBlockDimN)), + MakeJitConstant("DX", dispatchData.gemmStyle.globalWorkSizeDX), + MakeJitConstant("DY", dispatchData.gemmStyle.globalWorkSizeDY), MakeJitConstant("FILTER_SIZE_X_DIV2", params.filterSize.x / 2), MakeJitConstant("INPUT_BUFFER_WIDTH_PADDED", ""), // TODO: enable non padding path again MakeJitConstant("INPUT_BUFFER_HEIGHT_PADDED", ""), }); - if (CeilDiv(RoundUp(params.output.X().v * params.output.Y().v, runInfo.gemmStyle.subBlockDimM), - runInfo.gemmStyle.globalWorkSizeDY) % - runInfo.lws1 != + if (CeilDiv(RoundUp(params.output.X().v * params.output.Y().v, dispatchData.gemmStyle.subBlockDimM), + dispatchData.gemmStyle.globalWorkSizeDY) % + dispatchData.lws[1] != 0) jit.AddConstant(MakeJitConstant("LEFTOVERS", 1)); @@ -73,29 +73,29 @@ JitConstants ConvolutionKernel_bfyx_GEMMLike::GetJitConstants(const convolution_ ConvolutionKernel_bfyx_GEMMLike::Parent::DispatchData ConvolutionKernel_bfyx_GEMMLike::SetDefault( const convolution_params& arg, int autoTuneIndex) const { - DispatchData runInfo = Parent::SetDefault(arg, autoTuneIndex); + DispatchData dispatchData = Parent::SetDefault(arg, autoTuneIndex); - runInfo.lws0 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[2] = 1; if (arg.inputs[0].GetDType() == Datatype::F16) { - runInfo.gemmStyle = {1, arg.filterSize.x, 32, 32, 1, 1}; - runInfo.lws1 = 16; - runInfo.efficiency = FORCE_PRIORITY_6; + dispatchData.gemmStyle = {1, arg.filterSize.x, 32, 32, 1, 1}; + dispatchData.lws[1] = 16; + dispatchData.efficiency = FORCE_PRIORITY_6; } else { - runInfo.gemmStyle = {2, arg.filterSize.x, 32, 32, 2, 1}; - runInfo.lws1 = 8; - runInfo.efficiency = FORCE_PRIORITY_8; + dispatchData.gemmStyle = {2, arg.filterSize.x, 32, 32, 2, 1}; + dispatchData.lws[1] = 8; + dispatchData.efficiency = FORCE_PRIORITY_8; } - size_t sgemm_m = RoundUp(arg.output.X().v * arg.output.Y().v, runInfo.gemmStyle.subBlockDimM); - size_t sgemm_n = RoundUp(arg.output.Feature().v / arg.groups, runInfo.gemmStyle.subBlockDimN); + size_t sgemm_m = RoundUp(arg.output.X().v * arg.output.Y().v, dispatchData.gemmStyle.subBlockDimM); + size_t sgemm_n = RoundUp(arg.output.Feature().v / arg.groups, dispatchData.gemmStyle.subBlockDimN); - runInfo.gws0 = RoundUp(CeilDiv(sgemm_n, runInfo.gemmStyle.globalWorkSizeDX), runInfo.lws0); - runInfo.gws1 = RoundUp(CeilDiv(sgemm_m, runInfo.gemmStyle.globalWorkSizeDY), runInfo.lws1); - runInfo.gws2 = arg.output.Batch().v * arg.groups; + dispatchData.gws[0] = RoundUp(CeilDiv(sgemm_n, dispatchData.gemmStyle.globalWorkSizeDX), dispatchData.lws[0]); + dispatchData.gws[1] = RoundUp(CeilDiv(sgemm_m, dispatchData.gemmStyle.globalWorkSizeDY), dispatchData.lws[1]); + dispatchData.gws[2] = arg.output.Batch().v * arg.groups; - return runInfo; + return dispatchData; } bool ConvolutionKernel_bfyx_GEMMLike::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h index fdf6527..e11336c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h @@ -34,8 +34,8 @@ protected: WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override; std::string GetKernelName(const convolution_params& params) const override; bool NeedPaddedInput() const override { return true; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp index 94cb32e..415fe3f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp @@ -39,19 +39,19 @@ ParamsKey ConvolutionKernel_bfyx_iyxo::GetSupportedKey() const { } ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_iyxo::SetDefault(const convolution_params& cp, int) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; - runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 4; - runInfo.gws1 = cp.output.Y().v; - runInfo.gws2 = sub_group_size; + dispatchData.gws[0] = CeilDiv(cp.output.X().v, sub_group_size) / 4; + dispatchData.gws[1] = cp.output.Y().v; + dispatchData.gws[2] = sub_group_size; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = sub_group_size; - return runInfo; + return dispatchData; } bool ConvolutionKernel_bfyx_iyxo::Validate(const Params& p, const optional_params& o) const { @@ -76,10 +76,10 @@ bool ConvolutionKernel_bfyx_iyxo::Validate(const Params& p, const optional_param return true; } -JitConstants ConvolutionKernel_bfyx_iyxo::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); +JitConstants ConvolutionKernel_bfyx_iyxo::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2])); return jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h index ec82082..2c574ed 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h @@ -34,7 +34,7 @@ protected: return WeightsLayout::iyxo; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp index eb641bc..fce3e6a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp @@ -147,7 +147,6 @@ ConvolutionKernel_bfyx_os_iyx_osv16::AutoTuneOption ConvolutionKernel_bfyx_os_iy option.blockWidth = 4; option.blockHeight = 3; option.prefetch = 5; - // run_info.efficiency = FORCE_PRIORITY_7; // GEMM is better } // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block @@ -161,39 +160,39 @@ ConvolutionKernel_bfyx_os_iyx_osv16::AutoTuneOption ConvolutionKernel_bfyx_os_iy ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_os_iyx_osv16::SetDefault(const convolution_params& cp, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); const auto of_maps = cp.output.Feature().v; const auto of_maps_per_group = of_maps / cp.groups; const size_t of_threads_per_batch = RoundUp(of_maps_per_group, sub_group_size) * cp.groups; - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex); - runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth; - runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight; - runInfo.cldnnStyle.prefetch = tuneOptions.prefetch; + dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth; + dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight; + dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch; - auto input_block_dims = get_bfyx_req_input_block_dims(runInfo.cldnnStyle.blockWidth, - runInfo.cldnnStyle.blockHeight, + auto input_block_dims = get_bfyx_req_input_block_dims(dispatchData.cldnnStyle.blockWidth, + dispatchData.cldnnStyle.blockHeight, cp.filterSize, cp.stride, cp.dilation, sub_group_size, - runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2, + cp.output.GetDType() == Datatype::F16 ? sub_group_size : sub_group_size / 2, sub_group_size); - runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first; - runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second; + dispatchData.cldnnStyle.inputBlockArraySize = input_block_dims.first; + dispatchData.cldnnStyle.inputBlockWidth = input_block_dims.second; - runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth); - runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight); - runInfo.gws2 = of_threads_per_batch * cp.output.Batch().v; + dispatchData.gws[0] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth); + dispatchData.gws[1] = CeilDiv(cp.output.Y().v, dispatchData.cldnnStyle.blockHeight); + dispatchData.gws[2] = of_threads_per_batch * cp.output.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = sub_group_size; - return runInfo; + return dispatchData; } bool ConvolutionKernel_bfyx_os_iyx_osv16::Validate(const Params& p, const optional_params& o) const { @@ -205,13 +204,13 @@ bool ConvolutionKernel_bfyx_os_iyx_osv16::Validate(const Params& p, const option } JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { const auto of_maps = params.output.Feature().v; const auto of_maps_per_group = of_maps / params.groups; const size_t of_threads_per_batch = RoundUp(of_maps_per_group, sub_group_size); size_t leftovers = of_threads_per_batch - of_maps_per_group; - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); @@ -220,12 +219,12 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolut } - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight)); - jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize)); - jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth)); - jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2])); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight)); + jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", dispatchData.cldnnStyle.inputBlockArraySize)); + jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth)); + jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch)); if (leftovers) { jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h index 536970b..72706b0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h @@ -40,7 +40,7 @@ protected: FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp index 152fb25..f515fa3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp @@ -146,7 +146,6 @@ ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::AutoTuneOption ConvolutionKernel_bfyx_ option.blockWidth = 4; option.blockHeight = 3; option.prefetch = 5; - // run_info.efficiency = FORCE_PRIORITY_7; // GEMM is better } // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block @@ -160,38 +159,38 @@ ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::AutoTuneOption ConvolutionKernel_bfyx_ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::SetDefault(const convolution_params& cp, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); const auto of_maps = cp.output.Feature().v; const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size); - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex); - runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth; - runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight; - runInfo.cldnnStyle.prefetch = tuneOptions.prefetch; + dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth; + dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight; + dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch; - auto input_block_dims = get_bfyx_req_input_block_dims(runInfo.cldnnStyle.blockWidth, - runInfo.cldnnStyle.blockHeight, + auto input_block_dims = get_bfyx_req_input_block_dims(dispatchData.cldnnStyle.blockWidth, + dispatchData.cldnnStyle.blockHeight, cp.filterSize, cp.stride, cp.dilation, sub_group_size, - runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2, + cp.output.GetDType() == Datatype::F16 ? sub_group_size : sub_group_size / 2, sub_group_size); - runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first; - runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second; + dispatchData.cldnnStyle.inputBlockArraySize = input_block_dims.first; + dispatchData.cldnnStyle.inputBlockWidth = input_block_dims.second; - runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth); - runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight); - runInfo.gws2 = 2 * of_threads_per_batch * cp.output.Batch().v; + dispatchData.gws[0] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth); + dispatchData.gws[1] = CeilDiv(cp.output.Y().v, dispatchData.cldnnStyle.blockHeight); + dispatchData.gws[2] = 2 * of_threads_per_batch * cp.output.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 2 * sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 2 * sub_group_size; - return runInfo; + return dispatchData; } bool ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::Validate(const Params& p, const optional_params& o) const { @@ -211,19 +210,19 @@ bool ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::Validate(const Params& p, const o } JitConstants ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { const auto of_maps = params.output.Feature().v; const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size); size_t leftovers = of_threads_per_batch - of_maps; - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 16)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight)); - jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize)); - jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth)); - jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight)); + jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", dispatchData.cldnnStyle.inputBlockArraySize)); + jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth)); + jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch)); if (leftovers) { jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h index b3bcf6d..75e8c3b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h @@ -33,7 +33,7 @@ public: protected: WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; @@ -50,4 +50,4 @@ private: std::vector autoTuneOptions = {}; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp index 5df33de..a553b67 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.cpp @@ -72,32 +72,32 @@ ParamsKey ConvolutionKernel_bfyx_to_bfyx_f16::GetSupportedKey() const { ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_to_bfyx_f16::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params); const auto& out = params.output; auto autoTune = GetAutoTuneOptions(params, autoTuneIndex); - kd.cldnnStyle.blockWidth = autoTune.blockWidth; + dispatchData.cldnnStyle.blockWidth = autoTune.blockWidth; auto x = out.X().v; auto y = out.Y().v; auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = CeilDiv(x, autoTune.blockWidth) * y; - kd.gws1 = Align(f, sub_group_size); - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x, autoTune.blockWidth) * y; + dispatchData.gws[1] = Align(f, sub_group_size); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = sub_group_size; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; if (b == 1) - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; else - kd.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; - return kd; + return dispatchData; } bool ConvolutionKernel_bfyx_to_bfyx_f16::Validate(const Params& p, const optional_params& o) const { @@ -124,12 +124,12 @@ bool ConvolutionKernel_bfyx_to_bfyx_f16::Validate(const Params& p, const optiona } JitConstants ConvolutionKernel_bfyx_to_bfyx_f16::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { auto input = params.inputs[0]; auto output = params.output; - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); - auto blockWidth = runInfo.cldnnStyle.blockWidth; + auto blockWidth = dispatchData.cldnnStyle.blockWidth; if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h index adf4d75..a1edefc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_b_fs_yx_fsv16.h @@ -48,7 +48,7 @@ protected: bool NeedPaddedInput() const override { return false; } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; struct AutoTuneOption { size_t blockWidth; std::string exeMode; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp index 2ec407a..e2f73f0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.cpp @@ -51,12 +51,12 @@ ParamsKey ConvolutionKernel_bfyx_to_bfyx_bsv16_fsv16::GetSupportedKey() const { } ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_to_bfyx_bsv16_fsv16::SetDefault(const convolution_params& params, - int autoTuneIndex) const { - DispatchData kd = ConvolutionKernel_bfyx_to_bfyx_f16::SetDefault(params, autoTuneIndex); + int autoTuneIndex) const { + DispatchData dispatchData = ConvolutionKernel_bfyx_to_bfyx_f16::SetDefault(params, autoTuneIndex); - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } bool ConvolutionKernel_bfyx_to_bfyx_bsv16_fsv16::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp index 69d9c5c..b4db077 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.cpp @@ -73,24 +73,24 @@ ConvolutionKernel_bfyx_to_fs_byx_fsv32::AutoTuneOption ConvolutionKernel_bfyx_to ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_to_fs_byx_fsv32::SetDefault(const convolution_params& arg, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg); AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex); - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; - runInfo.cldnnStyle.blockHeight = option.blockHeight; - runInfo.cldnnStyle.blockWidth = option.blockWidth; + dispatchData.cldnnStyle.blockHeight = option.blockHeight; + dispatchData.cldnnStyle.blockWidth = option.blockWidth; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 16; - runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth); - runInfo.gws1 = CeilDiv(arg.output.Y().v, option.blockHeight); - runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; + dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth); + dispatchData.gws[1] = CeilDiv(arg.output.Y().v, option.blockHeight); + dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; - return runInfo; + return dispatchData; } bool ConvolutionKernel_bfyx_to_fs_byx_fsv32::Validate(const Params& p, const optional_params& o) const { @@ -107,16 +107,16 @@ bool ConvolutionKernel_bfyx_to_fs_byx_fsv32::Validate(const Params& p, const opt } JitConstants ConvolutionKernel_bfyx_to_fs_byx_fsv32::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", kd.cldnnStyle.blockHeight)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight)); auto inputBlockWidth = - getInputSize(params.stride.x, params.filterSize.x, params.dilation.x, kd.cldnnStyle.blockWidth); + getInputSize(params.stride.x, params.filterSize.x, params.dilation.x, dispatchData.cldnnStyle.blockWidth); auto inputBlockHeight = - getInputSize(params.stride.y, params.filterSize.y, params.dilation.y, kd.cldnnStyle.blockHeight); + getInputSize(params.stride.y, params.filterSize.y, params.dilation.y, dispatchData.cldnnStyle.blockHeight); auto inputBlockWidthRound = RoundUp(inputBlockWidth, subGroupSize); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h index 4310347..4298f88 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_fs_byx_fsv32.h @@ -44,7 +44,7 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp index 9ae158c..8793243 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.cpp @@ -111,25 +111,25 @@ ConvolutionKernel_fs_byx_fsv32::AutoTuneOption ConvolutionKernel_fs_byx_fsv32::G ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32::SetDefault(const convolution_params& arg, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg); AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex); - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; - runInfo.cldnnStyle.blockHeight = 1; - runInfo.cldnnStyle.blockWidth = option.blockWidth; - runInfo.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth); + dispatchData.cldnnStyle.blockHeight = 1; + dispatchData.cldnnStyle.blockWidth = option.blockWidth; + dispatchData.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth); - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 16; - runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth); - runInfo.gws1 = arg.output.Y().v; - runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; + dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth); + dispatchData.gws[1] = arg.output.Y().v; + dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; - return runInfo; + return dispatchData; } bool ConvolutionKernel_fs_byx_fsv32::Validate(const Params& p, const optional_params& o) const { @@ -150,15 +150,15 @@ bool ConvolutionKernel_fs_byx_fsv32::Validate(const Params& p, const optional_pa } JitConstants ConvolutionKernel_fs_byx_fsv32::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); auto accumulator_type = GetAccumulatorType(params); auto activation_type = GetAccumulatorType(params); jit.Merge(MakeTypeJitConstants(accumulator_type, "ACCUMULATOR")); jit.Merge(MakeTypeJitConstants(activation_type, "ACTIVATION")); - jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", kd.cldnnStyle.inputBlockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); jit.AddConstant(MakeJitConstant("FSV", fsv)); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize)); jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h index fd3f668..a873772 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32.h @@ -44,7 +44,7 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp index 5533baa..49e3c70 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.cpp @@ -109,24 +109,24 @@ ConvolutionKernel_fs_byx_fsv32_1x1::AutoTuneOption ConvolutionKernel_fs_byx_fsv3 ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32_1x1::SetDefault(const convolution_params& arg, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg); AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex); - runInfo.efficiency = FORCE_PRIORITY_4; + dispatchData.efficiency = FORCE_PRIORITY_4; - runInfo.cldnnStyle.blockHeight = option.blockHeight; - runInfo.cldnnStyle.blockWidth = option.blockWidth; + dispatchData.cldnnStyle.blockHeight = option.blockHeight; + dispatchData.cldnnStyle.blockWidth = option.blockWidth; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 16; - runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth); - runInfo.gws1 = CeilDiv(arg.output.Y().v, option.blockHeight); - runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; + dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth); + dispatchData.gws[1] = CeilDiv(arg.output.Y().v, option.blockHeight); + dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; - return runInfo; + return dispatchData; } bool ConvolutionKernel_fs_byx_fsv32_1x1::Validate(const Params& p, const optional_params& o) const { @@ -150,11 +150,11 @@ bool ConvolutionKernel_fs_byx_fsv32_1x1::Validate(const Params& p, const optiona } JitConstants ConvolutionKernel_fs_byx_fsv32_1x1::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", kd.cldnnStyle.blockHeight)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight)); jit.AddConstant(MakeJitConstant("FSV", fsv)); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize)); jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h index 5e822e2..cda0f51 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_1x1.h @@ -45,7 +45,7 @@ protected: bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp index cbb3999..37fc5c4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp @@ -109,26 +109,26 @@ ConvolutionKernel_fs_byx_fsv32_depthwise::AutoTuneOption ConvolutionKernel_fs_by } ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32_depthwise::SetDefault(const convolution_params& arg, - int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + int autoTuneIndex) const { + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg); AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex); - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; - runInfo.cldnnStyle.blockHeight = 1; - runInfo.cldnnStyle.blockWidth = option.blockWidth; - runInfo.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth); + dispatchData.cldnnStyle.blockHeight = 1; + dispatchData.cldnnStyle.blockWidth = option.blockWidth; + dispatchData.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth); - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 16; - runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth); - runInfo.gws1 = arg.output.Y().v; - runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; + dispatchData.gws[0] = CeilDiv(arg.output.X().v, option.blockWidth); + dispatchData.gws[1] = arg.output.Y().v; + dispatchData.gws[2] = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v; - return runInfo; + return dispatchData; } bool ConvolutionKernel_fs_byx_fsv32_depthwise::Validate(const Params& p, const optional_params& o) const { @@ -154,11 +154,11 @@ bool ConvolutionKernel_fs_byx_fsv32_depthwise::Validate(const Params& p, const o } JitConstants ConvolutionKernel_fs_byx_fsv32_depthwise::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", kd.cldnnStyle.inputBlockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); jit.AddConstant(MakeJitConstant("FSV", fsv)); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize)); jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread)); @@ -178,8 +178,8 @@ JitConstants ConvolutionKernel_fs_byx_fsv32_depthwise::GetJitConstants(const con } KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetTunedKernelsDataByIndex(const Params& params, - const optional_params& options, - const int autoTuneIndex) const { + const optional_params& options, + const int autoTuneIndex) const { auto tuneOptions = GetAutoTuneOptions(params, autoTuneIndex); return GetCommonKernelsData(params, options, tuneOptions.exeMode, autoTuneIndex); } @@ -189,7 +189,7 @@ KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetKernelsData(const Param } KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetKernelsDataForAutoTune(const Params& params, - const optional_params& options) const { + const optional_params& options) const { if (!Validate(params, options)) { return {}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h index 2d563f2..08914ec 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h @@ -44,7 +44,7 @@ protected: } bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp index bb4158f..8283fc5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp @@ -103,8 +103,8 @@ KernelsData ConvolutionKernel_imad::GetKernelsData(const Params& params, const o return GetCommonKernelsData(params, options); } -JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& params, const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); +JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); const auto& input = params.inputs[0]; const auto& output = params.output; @@ -150,7 +150,7 @@ JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& p ConvolutionKernelBase::DispatchData ConvolutionKernel_imad::SetDefault(const convolution_params& params, int) const { - DispatchData kd; + DispatchData dispatchData; const auto& output = params.output; const auto& weights = params.weights; @@ -158,34 +158,26 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad::SetDefault(const con size_t otw, oth; getOutBlock_WH(output.X().v, params.stride.x, weights.X().v, params.dilation.x, otw, oth); - std::vector global = {// number of tiles needed to cover output width - CeilDiv(output.X().v, otw), + dispatchData.gws = { // number of tiles needed to cover output width + CeilDiv(output.X().v, otw), - // number of tiles needed to cover output height - CeilDiv(output.Y().v, oth), + // number of tiles needed to cover output height + CeilDiv(output.Y().v, oth), - // round depth range up - Align(weights.OFM().v, SIMD_SIZE) * params.groups * output.Batch().v}; + // round depth range up + Align(weights.OFM().v, SIMD_SIZE) * params.groups * output.Batch().v }; - std::vector local = {1, 1, SIMD_SIZE}; + dispatchData.lws = {1, 1, SIMD_SIZE}; - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; + dispatchData.cldnnStyle = {0, 0, 0, 0, 0}; + dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0}; // This kernel is quite slow for 1x1 and KHx1 kernels // TODO: check if we need any optimized kernels in this layout // If yes, we need to implement some customization for these cases. - kd.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; - return kd; + return dispatchData; } // SetDefault bool ConvolutionKernel_imad::Validate(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h index 37378c7..c20058d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } WeightsLayout GetPreferredWeightsLayout(const convolution_params &p) const override { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp index adc69bf..2b9c8fb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp @@ -120,7 +120,7 @@ bool ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::ValidateAutoTuneParams(const convo } ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::AutoTuneParams ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetAutoTuneParams(const convolution_params& params, - int index) const { + int index) const { AutoTuneParams tune_params; bool selected = false; if (index >= 0 && index < static_cast(all_tune_params.size())) { @@ -143,13 +143,13 @@ ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::AutoTuneParams ConvolutionKernel_imad_b } JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); - auto simd = kd.lws0; - auto features_per_wi = kd.cldnnStyle.blockHeight; - auto lwg_depth = kd.lws2; - auto force_prefetch = kd.cldnnStyle.prefetch == 1; + auto simd = dispatchData.lws[0]; + auto features_per_wi = dispatchData.cldnnStyle.blockHeight; + auto lwg_depth = dispatchData.lws[2]; + auto force_prefetch = dispatchData.cldnnStyle.prefetch == 1; mem_consts.AddConstant(MakeJitConstant("SIMD", simd)); mem_consts.AddConstant(MakeJitConstant("FEATURES_PER_WI", features_per_wi)); @@ -175,8 +175,8 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetJitConstants(const conv } // GetJitConstants ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::SetDefault(const convolution_params& params, - int autoTuneIndex) const { - DispatchData kd; + int autoTuneIndex) const { + DispatchData dispatchData; auto& out = params.output; auto autoTuneParam = GetAutoTuneParams(params, autoTuneIndex); @@ -184,31 +184,23 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::Set auto simd = autoTuneParam.simd; auto features_per_wi = autoTuneParam.features_per_wi; - std::vector global = { RoundUp(out.X().v * out.Y().v, simd), CeilDiv(out.Feature().v, features_per_wi), out.Batch().v * lwg_depth }; - std::vector local = { simd, 1, lwg_depth}; + dispatchData.gws = { RoundUp(out.X().v * out.Y().v, simd), CeilDiv(out.Feature().v, features_per_wi), out.Batch().v * lwg_depth }; + dispatchData.lws = { simd, 1, lwg_depth}; - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.gemmStyle = { 0, 0, 0, 0, 0, 0 }; - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.cldnnStyle.blockHeight = features_per_wi; + dispatchData.cldnnStyle.blockWidth = simd; + dispatchData.cldnnStyle.prefetch = autoTuneParam.force_prefetch ? 1 : 0; - kd.gemmStyle = { 0, 0, 0, 0, 0, 0 }; + dispatchData.efficiency = FORCE_PRIORITY_1; - kd.cldnnStyle.blockHeight = features_per_wi; - kd.cldnnStyle.blockWidth = simd; - kd.cldnnStyle.prefetch = autoTuneParam.force_prefetch ? 1 : 0; - - kd.efficiency = FORCE_PRIORITY_1; - - return kd; + return dispatchData; } // SetDefault KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetTunedKernelsDataByIndex(const Params& params, - const optional_params& options, - int autoTuneIndex) const { + const optional_params& options, + int autoTuneIndex) const { auto convParams = static_cast(params); auto tuneParams = GetAutoTuneParams(convParams, autoTuneIndex); return GetCommonKernelsData(params, options, tuneParams.exeMode, autoTuneIndex); @@ -219,7 +211,7 @@ KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetKernelsData(const Params } KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetKernelsDataForAutoTune(const Params& params, - const optional_params& options) const { + const optional_params& options) const { if (!Validate(params, options)) { return {}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h index 4d66e46..6cf7e8d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.h @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp index 43b83e2..1e1a229 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp @@ -273,8 +273,8 @@ ConvolutionKernel_imad_b_fs_yx_fsv4_dw::AutoTuneParams ConvolutionKernel_imad_b_ } JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); size_t filter_block_size = 4; size_t min_blocked_leftovers = 4; @@ -288,7 +288,7 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convo } mem_consts.AddConstant(MakeJitConstant("FILTER_BLOCKED", filter_blocked)); - auto& work_mode = kd.cldnnStyle.prefetch; + auto& work_mode = dispatchData.cldnnStyle.prefetch; bool tiled = (work_mode & mode::tiled) != 0; bool preload_input = (work_mode & mode::preload_input) != 0; bool preload_weights = (work_mode & mode::preload_weights) != 0; @@ -300,21 +300,21 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convo if (tiled) { preload_weights = true; - simd = kd.lws0; - tile_x = kd.cldnnStyle.blockWidth; - tile_y = kd.cldnnStyle.blockHeight; + simd = dispatchData.lws[0]; + tile_x = dispatchData.cldnnStyle.blockWidth; + tile_y = dispatchData.cldnnStyle.blockHeight; input_line_size = 1; output_block_x = 1; } else if (preload_input) { tile_x = 1; - tile_y = kd.cldnnStyle.blockHeight; - output_block_x = kd.cldnnStyle.blockWidth; + tile_y = dispatchData.cldnnStyle.blockHeight; + output_block_x = dispatchData.cldnnStyle.blockWidth; input_line_size = (output_block_x - 1) * params.stride.x + (params.weights.X().v - 1) * params.dilation.x + 1; } else { tile_x = 1; tile_y = 1; input_line_size = 1; - output_block_x = kd.cldnnStyle.blockWidth; + output_block_x = dispatchData.cldnnStyle.blockWidth; } mem_consts.AddConstant(MakeJitConstant("TILED", tiled)); @@ -345,7 +345,7 @@ JitConstants ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetJitConstants(const convo ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd; + DispatchData dispatchData; auto& out = params.output; auto autoTuneParam = GetAutoTuneParams(params, autoTuneIndex); @@ -357,34 +357,26 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::SetD global_x = global_x * autoTuneParam.tiled_simd; } - std::vector global = { global_x, global_y, CeilDiv(out.Feature().v, fsv) * out.Batch().v }; - std::vector local = { 1, 1, 1 }; + dispatchData.gws = { global_x, global_y, CeilDiv(out.Feature().v, fsv) * out.Batch().v }; + dispatchData.lws = { 1, 1, 1 }; if (autoTuneParam.tiled) { - local[0] = autoTuneParam.tiled_simd; + dispatchData.lws[0] = autoTuneParam.tiled_simd; } else { - local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); } - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.gemmStyle = { 0, 0, 0, 0, 0, 0 }; - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.cldnnStyle.blockWidth = autoTuneParam.block_x; + dispatchData.cldnnStyle.blockHeight = autoTuneParam.block_y; + dispatchData.cldnnStyle.prefetch = (static_cast(autoTuneParam.tiled) * mode::tiled) + | (static_cast(autoTuneParam.preload_input) * mode::preload_input) + | (static_cast(autoTuneParam.preload_weights) * mode::preload_weights); - kd.gemmStyle = { 0, 0, 0, 0, 0, 0 }; + dispatchData.efficiency = FORCE_PRIORITY_1; - kd.cldnnStyle.blockWidth = autoTuneParam.block_x; - kd.cldnnStyle.blockHeight = autoTuneParam.block_y; - kd.cldnnStyle.prefetch = (static_cast(autoTuneParam.tiled) * mode::tiled) - | (static_cast(autoTuneParam.preload_input) * mode::preload_input) - | (static_cast(autoTuneParam.preload_weights) * mode::preload_weights); - - kd.efficiency = FORCE_PRIORITY_1; - - return kd; + return dispatchData; } // SetDefault KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetTunedKernelsDataByIndex(const Params& params, @@ -400,7 +392,7 @@ KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetKernelsData(const Params& } KernelsData ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetKernelsDataForAutoTune(const Params& params, - const optional_params& options) const { + const optional_params& options) const { if (!Validate(params, options)) { return {}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp index 8021e7d..06a21a5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.hpp @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return false; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp index 4205064..4a80120 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp @@ -57,8 +57,8 @@ KernelsData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetKernelsData(con return GetCommonKernelsData(params, options); } -JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(const convolution_params& params, const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); +JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); FusedOpsConfiguration conf_scalar = {"", @@ -77,26 +77,18 @@ JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(c } // GetJitConstants ConvolutionKernelBase::DispatchData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::SetDefault(const convolution_params& params, int) const { - DispatchData kd; + DispatchData dispatchData; const auto& output = params.output; - std::vector global = {output.X().v, output.Y().v, output.Feature().v / 32 * output.Batch().v}; - std::vector local = {1, 1, SIMD_SIZE}; + dispatchData.gws = { output.X().v, output.Y().v, output.Feature().v / 32 * output.Batch().v }; + dispatchData.lws = { 1, 1, SIMD_SIZE}; - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.cldnnStyle = {0, 0, 0, 0, 0}; + dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0}; - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.efficiency = FORCE_PRIORITY_2; - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; - - kd.efficiency = FORCE_PRIORITY_2; - - return kd; + return dispatchData; } // SetDefault bool Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::Validate(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h index 2091473..8f26280 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp index f87066a..fda27f2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp @@ -57,8 +57,8 @@ KernelsData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetKernelsData(con return GetCommonKernelsData(params, options); } -JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(const convolution_params& params, const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); +JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + auto mem_consts = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); @@ -77,26 +77,18 @@ JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(c } // GetJitConstants ConvolutionKernelBase::DispatchData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::SetDefault(const convolution_params& params, int) const { - DispatchData kd; + DispatchData dispatchData; const auto& output = params.output; - std::vector global = {output.X().v, output.Y().v, output.Feature().v / 16 * output.Batch().v}; - std::vector local = {1, 1, SIMD_SIZE}; + dispatchData.gws = { output.X().v, output.Y().v, output.Feature().v / 16 * output.Batch().v }; + dispatchData.lws = { 1, 1, SIMD_SIZE }; - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.cldnnStyle = {0, 0, 0, 0, 0}; + dispatchData.gemmStyle = {0, 0, 0, 0, 0, 0}; - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.efficiency = FORCE_PRIORITY_2; - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; - - kd.efficiency = FORCE_PRIORITY_2; - - return kd; + return dispatchData; } // SetDefault bool Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::Validate(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h index eaf7d07..677ccfe 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return true; } WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp index 01eefa0..cb10986 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp @@ -96,46 +96,46 @@ ConvolutionKernel_mmad_b_fs_yx_fsv32::AutoTuneOption ConvolutionKernel_mmad_b_fs ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_b_fs_yx_fsv32::SetDefault(const convolution_params& cp, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex); - runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth; - runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight; - runInfo.cldnnStyle.prefetch = tuneOptions.prefetch; + dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth; + dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight; + dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch; - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; size_t ow_group = 8; while (ow_group > 1) { - if (CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth) % ow_group == 0) + if (CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth) % ow_group == 0) break; ow_group--; } - runInfo.gws0 = Align(cp.output.Feature().v, 32) / 4; - runInfo.gws1 = Align(CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth), ow_group) * cp.output.Y().v * cp.output.Z().v; - runInfo.gws2 = cp.output.Batch().v; + dispatchData.gws[0] = Align(cp.output.Feature().v, 32) / 4; + dispatchData.gws[1] = Align(CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth), ow_group) * cp.output.Y().v * cp.output.Z().v; + dispatchData.gws[2] = cp.output.Batch().v; - runInfo.lws0 = 8; - runInfo.lws1 = ow_group; - runInfo.lws2 = 1; + dispatchData.lws[0] = 8; + dispatchData.lws[1] = ow_group; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_mmad_b_fs_yx_fsv32::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("OW_GROUP", runInfo.lws1)); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws0)); + jit.AddConstant(MakeJitConstant("OW_GROUP", dispatchData.lws[1])); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0])); jit.AddConstant(MakeJitConstant("OSV_SIZE", 32)); jit.AddConstant(MakeJitConstant("ISV_SIZE", 32)); - jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", runInfo.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", dispatchData.cldnnStyle.blockWidth)); jit.AddConstant(MakeJitConstant("IFM_BLOCKS", CeilDiv(params.inputs[0].Feature().v, 32))); auto input = params.inputs[0]; auto output = params.output; - auto blockWidth = runInfo.cldnnStyle.blockWidth; + auto blockWidth = dispatchData.cldnnStyle.blockWidth; size_t input_line_size = params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1)*params.dilation.x + 1; jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", blockWidth)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h index 933f6c7..d88972b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.h @@ -33,7 +33,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; bool NeedPaddedInput() const override { return false; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp index 35926d8..862800a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp @@ -75,28 +75,20 @@ bool ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::Validate(const Params& p, const op ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::SetDefault(const convolution_params& cp, int /*autoTuneIndex*/) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; - std::vector global = {cp.output.Feature().v, cp.output.X().v * cp.output.Y().v, cp.output.Batch().v}; + dispatchData.gws = { cp.output.Feature().v, cp.output.X().v * cp.output.Y().v, cp.output.Batch().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, cp.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - auto local = GetOptimalLocalWorkGroupSizes(global, cp.engineInfo); - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } // TODO: optimize this kernel JitConstants ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h index 1e1efb3..26b3e45 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.h @@ -33,7 +33,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { return WeightsLayout::goiyx; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp index cf352d5..551c2aa 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp @@ -84,7 +84,7 @@ bool ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::Validate(const Params &p, con } ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::AutoTuneOption ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetAutoTuneOptions(const Params &p, - int autoTuneIndex) const { + int autoTuneIndex) const { if ((autoTuneIndex >= 0) && (autoTuneIndex < static_cast(autoTuneOptions.size()))) { return autoTuneOptions[autoTuneIndex]; } @@ -150,50 +150,50 @@ static size_t get_lws(const convolution_params &cp, size_t blocks_count, size_t ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::SetDefault(const convolution_params &cp, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex); - runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth; - runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight; - runInfo.cldnnStyle.prefetch = tuneOptions.prefetch; + dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth; + dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight; + dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch; - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; const size_t max_lws = std::max((size_t)1, cp.engineInfo.maxWorkGroupSize / sub_group_size); - runInfo.gws0 = Align(cp.output.Feature().v, 32) / 2; - runInfo.gws1 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth); - runInfo.gws2 = cp.output.Batch().v * cp.output.Y().v * cp.output.Z().v; + dispatchData.gws[0] = Align(cp.output.Feature().v, 32) / 2; + dispatchData.gws[1] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth); + dispatchData.gws[2] = cp.output.Batch().v * cp.output.Y().v * cp.output.Z().v; - runInfo.lws0 = sub_group_size; - runInfo.lws1 = get_lws(cp, runInfo.gws1, tuneOptions.blockWidth, max_lws); - runInfo.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = get_lws(cp, dispatchData.gws[1], tuneOptions.blockWidth, max_lws); + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetJitConstants(const convolution_params ¶ms, - const DispatchData &runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData &dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws0)); - jit.AddConstant(MakeJitConstant("LWS0", runInfo.lws0)); - jit.AddConstant(MakeJitConstant("LWS1", runInfo.lws1)); - jit.AddConstant(MakeJitConstant("LWS2", runInfo.lws2)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0])); + jit.AddConstant(MakeJitConstant("LWS0", dispatchData.lws[0])); + jit.AddConstant(MakeJitConstant("LWS1", dispatchData.lws[1])); + jit.AddConstant(MakeJitConstant("LWS2", dispatchData.lws[2])); jit.AddConstant(MakeJitConstant("OSV", 32)); - jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", runInfo.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", dispatchData.cldnnStyle.blockWidth)); auto input = params.inputs[0]; auto output = params.output; - auto blockWidth = runInfo.cldnnStyle.blockWidth; - size_t slm_line_size = params.stride.x * (runInfo.lws1 * blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1; - size_t slm_chunk_size = slm_line_size / runInfo.lws1; - size_t slm_tail = slm_line_size % runInfo.lws1; - size_t slm_line_aligned = slm_chunk_size*runInfo.lws1 + Align(slm_tail, sub_group_size); + auto blockWidth = dispatchData.cldnnStyle.blockWidth; + size_t slm_line_size = params.stride.x * (dispatchData.lws[1] * blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1; + size_t slm_chunk_size = slm_line_size / dispatchData.lws[1]; + size_t slm_tail = slm_line_size % dispatchData.lws[1]; + size_t slm_line_aligned = slm_chunk_size*dispatchData.lws[1] + Align(slm_tail, sub_group_size); size_t input_line_size = params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1; jit.AddConstant(MakeJitConstant("INPUT_LINE_SIZE", input_line_size)); jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", blockWidth)); - jit.AddConstant(MakeJitConstant("GROUP_SIZE", blockWidth * runInfo.lws1)); + jit.AddConstant(MakeJitConstant("GROUP_SIZE", blockWidth * dispatchData.lws[1])); jit.AddConstant(MakeJitConstant("SLM_LINE_SIZE", slm_line_aligned)); jit.AddConstant(MakeJitConstant("SLM_CHUNK_SIZE", slm_chunk_size)); jit.AddConstant(MakeJitConstant("SLM_TAIL", slm_tail)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h index 7ed3da4..8bf8428 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h @@ -33,7 +33,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &p) const override { if (p.output.GetDType() == Datatype::F16 || p.output.GetDType() == Datatype::F32 || diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp index f9110ac..456d9b1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp @@ -87,39 +87,39 @@ ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::AutoTuneOption ConvolutionKernel_mm } ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::SetDefault(const convolution_params &cp, - int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp); + int autoTuneIndex) const { + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(cp); auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex); - runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth; - runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight; - runInfo.cldnnStyle.prefetch = tuneOptions.prefetch; + dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth; + dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight; + dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch; - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; - runInfo.gws0 = Align(cp.output.Feature().v, 32) / 2; - runInfo.gws1 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth) * cp.output.Y().v; - runInfo.gws2 = cp.output.Batch().v; + dispatchData.gws[0] = Align(cp.output.Feature().v, 32) / 2; + dispatchData.gws[1] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth) * cp.output.Y().v; + dispatchData.gws[2] = cp.output.Batch().v; - runInfo.lws0 = 16; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetJitConstants(const convolution_params ¶ms, - const DispatchData &runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData &dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws0)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0])); jit.AddConstant(MakeJitConstant("OSV", 32)); jit.AddConstant(MakeJitConstant("ISV", 32)); - jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", runInfo.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", dispatchData.cldnnStyle.blockWidth)); jit.AddConstant(MakeJitConstant("IFM_BLOCKS", CeilDiv(params.inputs[0].Feature().v, 32))); auto input = params.inputs[0]; auto output = params.output; - auto blockWidth = runInfo.cldnnStyle.blockWidth; + auto blockWidth = dispatchData.cldnnStyle.blockWidth; size_t input_line_size = std::min(params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1, input.X().v + input.X().pad.Total()); @@ -149,7 +149,7 @@ KernelsData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetKernelsData(const Pa } KernelsData ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetKernelsDataForAutoTune(const Params ¶ms, - const optional_params &options) const { + const optional_params &options) const { if (!Validate(params, options)) { return {}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h index 93c7a18..aa8e593 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.h @@ -33,7 +33,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { return WeightsLayout::os_is_yx_osv32_isv4_swizzled_by_2; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp index 764f753..da4b5cd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -67,8 +67,8 @@ KernelsData ConvolutionKernel_Ref::GetKernelsData(const Params& params, const op return GetTunedKernelsDataByIndex(params, options); } -JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& params, const DispatchData& kd) const { - JitConstants jit = ConvolutionKernelBase::GetJitConstants(params, kd); +JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { + JitConstants jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); Datatype accumulator_dt; Datatype activation_dt; @@ -100,7 +100,7 @@ JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& pa ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params, autoTuneIndex); // FIXME: ConvolutionKernelBase::SetDefault should probably be pure and // not setting these at all as it's something specific to a concrete @@ -111,18 +111,9 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const conv // Just set the correct value for a particular implementation here, // until the whole hierarchy is re-written. const auto& out = params.output; - std::vector global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; - - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - return kd; + dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + return dispatchData; } bool ConvolutionKernel_Ref::Validate(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h index 5e27f68..eeb3a9c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h @@ -43,7 +43,7 @@ protected: FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; bool Validate(const Params& params, const optional_params& options) const override; }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp index ac920de..f7f5637 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp @@ -44,8 +44,8 @@ ParamsKey ConvolutionKernel_Winograd_2x3_s1::GetSupportedKey() const { } JitConstants ConvolutionKernel_Winograd_2x3_s1::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const size_t input_tile_width = winograd_input_tile_width; const size_t input_tile_height = winograd_input_tile_height; @@ -70,10 +70,9 @@ JitConstants ConvolutionKernel_Winograd_2x3_s1::GetJitConstants(const convolutio return jit; } -ConvolutionKernel_Winograd_2x3_s1::Parent::DispatchData ConvolutionKernel_Winograd_2x3_s1::SetDefault( - const convolution_params& arg, - int) const { - Parent::DispatchData runInfo = Parent::SetDefault(arg); +ConvolutionKernel_Winograd_2x3_s1::Parent::DispatchData ConvolutionKernel_Winograd_2x3_s1::SetDefault(const convolution_params& arg, + int) const { + Parent::DispatchData dispatchData = Parent::SetDefault(arg); const size_t tile_n = winograd_tile_n; // goes in-depth const size_t tile_m = winograd_tile_m; // goes over flattened x and y @@ -86,17 +85,17 @@ ConvolutionKernel_Winograd_2x3_s1::Parent::DispatchData ConvolutionKernel_Winogr // width by tile's width to get tiles count const size_t nr_tiles_y = Align(arg.output.Y().v, 8) / input_tile_height; - runInfo.gws0 = arg.output.Feature().v / tile_n; - runInfo.gws1 = nr_tiles_x * nr_tiles_y / tile_m; - runInfo.gws2 = input_tile_width * input_tile_height * arg.inputs[0].Batch().v; + dispatchData.gws[0] = arg.output.Feature().v / tile_n; + dispatchData.gws[1] = nr_tiles_x * nr_tiles_y / tile_m; + dispatchData.gws[2] = input_tile_width * input_tile_height * arg.inputs[0].Batch().v; - runInfo.lws0 = 8; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 8; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - runInfo.efficiency = FORCE_PRIORITY_4; + dispatchData.efficiency = FORCE_PRIORITY_4; - return runInfo; + return dispatchData; } bool ConvolutionKernel_Winograd_2x3_s1::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h index e9f032a..66d7b50 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h @@ -34,8 +34,8 @@ protected: return WeightsLayout::winograd_2x3_s1_weights; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp index 43db00b..b24d05b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp @@ -38,8 +38,8 @@ ParamsKey ConvolutionKernel_Winograd_2x3_s1_fused::GetSupportedKey() const { } JitConstants ConvolutionKernel_Winograd_2x3_s1_fused::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const auto idepth = params.inputs[0].Feature().v; const auto input_pad_y = params.inputs[0].Y().pad.before + params.inputs[0].Y().pad.after; @@ -83,7 +83,7 @@ JitConstants ConvolutionKernel_Winograd_2x3_s1_fused::GetJitConstants(const conv ConvolutionKernel_Winograd_2x3_s1_fused::Parent::DispatchData ConvolutionKernel_Winograd_2x3_s1_fused::SetDefault( const convolution_params& arg, int) const { - Parent::DispatchData runInfo = Parent::SetDefault(arg); + Parent::DispatchData dispatchData = Parent::SetDefault(arg); const auto odepth = arg.output.Feature().v; const auto input_pad_y = arg.inputs[0].Y().pad.before + arg.inputs[0].Y().pad.after; @@ -100,21 +100,21 @@ ConvolutionKernel_Winograd_2x3_s1_fused::Parent::DispatchData ConvolutionKernel_ auto K = odepth; auto N = 1; - uint32_t global_step[3] = {14, 4, 16 * 8}; - uint32_t local_size[3] = {8, 2, 8}; + size_t global_step[3] = {14, 4, 16 * 8}; + size_t local_size[3] = {8, 2, 8}; - uint32_t zStep = local_size[2]; - runInfo.gws0 = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0]; - runInfo.gws1 = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1]; - runInfo.gws2 = ((uint32_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * zStep; + size_t zStep = local_size[2]; + dispatchData.gws[0] = ((size_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0]; + dispatchData.gws[1] = ((size_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1]; + dispatchData.gws[2] = ((size_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * zStep; - runInfo.lws0 = local_size[0]; - runInfo.lws1 = local_size[1]; - runInfo.lws2 = local_size[2]; + dispatchData.lws[0] = local_size[0]; + dispatchData.lws[1] = local_size[1]; + dispatchData.lws[2] = local_size[2]; - runInfo.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return runInfo; + return dispatchData; } bool ConvolutionKernel_Winograd_2x3_s1_fused::Validate(const Params& p, const optional_params& o) const { @@ -141,4 +141,4 @@ KernelsData ConvolutionKernel_Winograd_2x3_s1_fused::GetKernelsData(const Params const optional_params& options) const { return GetTunedKernelsDataByIndex(params, options); } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h index 269bd19..6cfdbde 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h @@ -34,8 +34,8 @@ protected: return WeightsLayout::winograd_2x3_s1_fused_weights; } - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp index 61be9ab..42fc634 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp @@ -39,8 +39,8 @@ ParamsKey ConvolutionKernel_Winograd_6x3_s1_fused::GetSupportedKey() const { } JitConstants ConvolutionKernel_Winograd_6x3_s1_fused::GetJitConstants(const convolution_params& params, - const DispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const auto idepth = params.inputs[0].Feature().v; const auto input_pad_y = params.inputs[0].Y().pad.before + params.inputs[0].Y().pad.after; @@ -95,7 +95,7 @@ WeightsLayout ConvolutionKernel_Winograd_6x3_s1_fused::GetPreferredWeightsLayout ConvolutionKernel_Winograd_6x3_s1_fused::Parent::DispatchData ConvolutionKernel_Winograd_6x3_s1_fused::SetDefault( const convolution_params& arg, int) const { - Parent::DispatchData runInfo = Parent::SetDefault(arg); + Parent::DispatchData dispatchData = Parent::SetDefault(arg); const auto odepth = arg.output.Feature().v; const auto input_pad_y = arg.inputs[0].Y().pad.before + arg.inputs[0].Y().pad.after; @@ -115,17 +115,17 @@ ConvolutionKernel_Winograd_6x3_s1_fused::Parent::DispatchData ConvolutionKernel_ uint32_t global_step[3] = {14, 6, 16 * 8}; uint32_t local_size[3] = {16, 1, 8}; - runInfo.gws0 = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0]; - runInfo.gws1 = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1]; - runInfo.gws2 = ((uint32_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * local_size[2]; + dispatchData.gws[0] = ((uint32_t)((Q + global_step[0] - 1)) / global_step[0]) * local_size[0]; + dispatchData.gws[1] = ((uint32_t)((P + global_step[1] - 1)) / global_step[1]) * local_size[1]; + dispatchData.gws[2] = ((uint32_t)((N * K * 8 + global_step[2] - 1)) / global_step[2]) * local_size[2]; - runInfo.lws0 = local_size[0]; - runInfo.lws1 = local_size[1]; - runInfo.lws2 = local_size[2]; + dispatchData.lws[0] = local_size[0]; + dispatchData.lws[1] = local_size[1]; + dispatchData.lws[2] = local_size[2]; - runInfo.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return runInfo; + return dispatchData; } bool ConvolutionKernel_Winograd_6x3_s1_fused::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h index 6c4a522..c5c657c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h @@ -30,9 +30,9 @@ public: ParamsKey GetSupportedKey() const override; protected: - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp index d67b61f..541ca75 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp @@ -77,7 +77,7 @@ size_t GetOfmPerWorkitem(Datatype dataType) { ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b16::SetDefault(const convolution_params& arg, int) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg); const auto filter_ofm_num = arg.weights.OFM().v * arg.weights.G().v; const auto batch_size = arg.output.Batch().v; @@ -87,15 +87,15 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b16::SetDefault( const size_t ofmPerWorkItem = GetOfmPerWorkitem(arg.inputs[0].GetDType()); if (arg.inputs[0].GetDType() == Datatype::F16) { - runInfo.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; } else { - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; } - runInfo.lws0 = min_lws; - runInfo.gws0 = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem); + dispatchData.lws[0] = min_lws; + dispatchData.gws[0] = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem); - return runInfo; + return dispatchData; } bool ConvolutionKernel_yxfb_yxio_b16::Validate(const Params& p, const optional_params& o) const { @@ -140,10 +140,10 @@ bool ConvolutionKernel_yxfb_yxio_b16::Validate(const Params& p, const optional_p } JitConstants ConvolutionKernel_yxfb_yxio_b16::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto jit = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - const auto local_work_group_size = kd.lws0; + const auto local_work_group_size = dispatchData.lws[0]; const auto batch_size = params.output.Batch().v; if (params.inputs[0].GetDType() == Datatype::F32) { @@ -168,7 +168,7 @@ JitConstants ConvolutionKernel_yxfb_yxio_b16::GetJitConstants(const convolution_ const size_t ofmPerWorkItem = GetOfmPerWorkitem(params.inputs[0].GetDType()); jit.AddConstants({ - MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0), + MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]), MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem), MakeJitConstant("BATCHES_PER_WORK_ITEM", batchesPerWorkItem), // how many batches will a single work item compute diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h index c57db65..e1c3aa1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h @@ -36,7 +36,7 @@ protected: } std::string GetKernelName(const convolution_params&) const override; bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp index 5d4b1e1..39f42b0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,16 +38,16 @@ ParamsKey ConvolutionKernel_yxfb_yxio_b1_block::GetSupportedKey() const { ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b1_block::SetDefault(const convolution_params& arg, int) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg); // TODO: fill the proper data here (I don't know where can I locate it). - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_yxfb_yxio_b1_block::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); - cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0)); + cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0])); return cldnn_jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h index e274092..e7b1aa9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h @@ -29,10 +29,10 @@ public: ParamsKey GetSupportedKey() const override; protected: - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { return WeightsLayout::yxio; } DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp index c2e7b82..c690b49 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -51,12 +51,12 @@ size_t GetOfmPerWorkitem(size_t filter_ofm_num, size_t localWorkSize) { ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::SetDefault( const convolution_params& arg, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex); const auto filter_ofm_num = arg.weights.OFM().v; const auto batch_size = arg.output.Batch().v; - runInfo.lws0 = local_work_size; + dispatchData.lws[0] = local_work_size; // We cannot return 8 because we are processing 4 spatial coordinates for batch1, // and if we use more than 4 ofm_per_work_item we downgrade simd16 to simd8 which would break this algorithm. @@ -65,28 +65,28 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b1_block_mulitpl // TODO: experiment with SIMD8 version of algorithm and check if it could be faster /*if (output_feature_count % (lws * 8) == 0) { - run_info.ofm_per_work_item = 8; - run_info.gws1 = static_cast(std::ceil(static_cast(run_info.gws1) / 2.0f)); + dispatchData.ofm_per_work_item = 8; + dispatchData.gws[1] = static_cast(std::ceil(static_cast(dispatchData.gws[1]) / 2.0f)); } else*/ const size_t ofmPerWorkItem = GetOfmPerWorkitem(filter_ofm_num, local_work_size); if (ofmPerWorkItem == 4) { // We compute multiple spatial coordinates "x" in a single workitem that's why we must divide - runInfo.gws1 = static_cast(std::ceil(static_cast(runInfo.gws1) / 4.0f)); + dispatchData.gws[1] = static_cast(std::ceil(static_cast(dispatchData.gws[1]) / 4.0f)); } else if (ofmPerWorkItem == 2) { - runInfo.gws1 = static_cast(std::ceil(static_cast(runInfo.gws1) / 8.0f)); + dispatchData.gws[1] = static_cast(std::ceil(static_cast(dispatchData.gws[1]) / 8.0f)); } else { - runInfo.gws1 = static_cast(std::ceil(static_cast(runInfo.gws1) / 8.0f)); + dispatchData.gws[1] = static_cast(std::ceil(static_cast(dispatchData.gws[1]) / 8.0f)); } - runInfo.gws0 = filter_ofm_num * batch_size / ofmPerWorkItem; + dispatchData.gws[0] = filter_ofm_num * batch_size / ofmPerWorkItem; - return runInfo; + return dispatchData; } JitConstants ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto cldnn_jit = ConvolutionKernelBase::GetJitConstants(params, dispatchData); size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, local_work_size); cldnn_jit.AddConstant(MakeJitConstant("USE_VECTOR", ofmPerWorkItem)); @@ -101,7 +101,7 @@ JitConstants ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::GetJitConstants(co cldnn_jit.AddConstant(MakeJitConstant( "OFM_PER_WORK_ITEM", ofmPerWorkItem)); // how many output feature maps for a single batch will a single work item produce - cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0)); + cldnn_jit.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0])); return cldnn_jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h index 6d91c0c..e1ff764 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h @@ -34,7 +34,7 @@ protected: return WeightsLayout::yxio; } bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp index f873379..1d573c4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp @@ -48,22 +48,22 @@ size_t GetOfmPerWorkitem(size_t filterOfmNum, size_t batchSize, size_t local_wor ConvolutionKernelBase::DispatchData ConvolutionKernel_yxfb_yxio_b8::SetDefault(const convolution_params& arg, int autoTuneIndex) const { - DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(arg, autoTuneIndex); const auto filterOfmNum = arg.weights.OFM().v; const auto batchSize = arg.output.Batch().v; - runInfo.lws0 = batchSize == 8 ? 8 : 16; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = batchSize == 8 ? 8 : 16; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - size_t ofmPerWorkItem = GetOfmPerWorkitem(filterOfmNum, batchSize, runInfo.lws0); + size_t ofmPerWorkItem = GetOfmPerWorkitem(filterOfmNum, batchSize, dispatchData.lws[0]); - runInfo.gws0 = filterOfmNum * batchSize / ofmPerWorkItem; + dispatchData.gws[0] = filterOfmNum * batchSize / ofmPerWorkItem; - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; - return runInfo; + return dispatchData; } bool ConvolutionKernel_yxfb_yxio_b8::Validate(const Params& p, const optional_params& o) const { @@ -99,13 +99,13 @@ bool ConvolutionKernel_yxfb_yxio_b8::Validate(const Params& p, const optional_pa } JitConstants ConvolutionKernel_yxfb_yxio_b8::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - JitConstants jits = ConvolutionKernelBase::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + JitConstants jits = ConvolutionKernelBase::GetJitConstants(params, dispatchData); - size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, params.output.Batch().v, kd.lws0); + size_t ofmPerWorkItem = GetOfmPerWorkitem(params.weights.OFM().v, params.output.Batch().v, dispatchData.lws[0]); jits.AddConstant(MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem)); - jits.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0)); + jits.AddConstant(MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0])); return jits; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h index c4f8f3b..669ec8f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h @@ -29,11 +29,11 @@ public: ParamsKey GetSupportedKey() const override; protected: - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override { return WeightsLayout::yxio; } bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp index a5687d6..106bac5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp @@ -48,7 +48,7 @@ ParamsKey DeformableConvolutionKernel_bfyx_conv::GetSupportedKey() const { DeformableConvolutionKernel_bfyx_conv::DispatchData DeformableConvolutionKernel_bfyx_conv::SetDefault(const convolution_params& params, int autoTuneIndex) const { - DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex); + DispatchData dispatchData = ConvolutionKernelBase::SetDefault(params, autoTuneIndex); const auto& out = params.output; @@ -57,21 +57,21 @@ DeformableConvolutionKernel_bfyx_conv::DispatchData DeformableConvolutionKernel_ auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = CeilDiv(x * y, 16); - kd.gws1 = Align(f, 16); - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x * y, 16); + dispatchData.gws[1] = Align(f, 16); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = 16; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 16; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } JitConstants DeformableConvolutionKernel_bfyx_conv::GetJitConstants(const convolution_params& params, - const DispatchData& /*kd*/) const { + const DispatchData& /*dispatchData*/) const { JitConstants jit = WeightBiasKernelBase::GetJitConstants(params); jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", 16)); jit.AddConstant(MakeJitConstant("INPUT_CHANNELS", params.inputs[0].Feature().v / params.weights.X().v / params.weights.Y().v)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h index eb0eb06..330874f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.h @@ -30,7 +30,7 @@ public: protected: DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const override; ParamsKey GetSupportedKey() const override; WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override { return WeightsLayout::os_is_yx_isv16_osv16; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp index 26fc779..ff2c3f4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp @@ -45,7 +45,7 @@ ParamsKey DeformableConvolutionKernel_bfyx_interp::GetSupportedKey() const { } CommonDispatchData DeformableConvolutionKernel_bfyx_interp::SetDefault(const convolution_params& params) const { - CommonDispatchData kd; + CommonDispatchData dispatchData; const auto& out = params.output; @@ -54,17 +54,17 @@ CommonDispatchData DeformableConvolutionKernel_bfyx_interp::SetDefault(const con auto b = out.Batch().v; auto kernel_size = params.kernelSize.x * params.kernelSize.y; - kd.gws0 = Align(x * y, 16); - kd.gws1 = params.deformable_groups * b; - kd.gws2 = kernel_size; + dispatchData.gws[0] = Align(x * y, 16); + dispatchData.gws[1] = params.deformable_groups * b; + dispatchData.gws[2] = kernel_size; - kd.lws0 = 16; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } @@ -91,14 +91,14 @@ KernelsData DeformableConvolutionKernel_bfyx_interp::GetKernelsData(const Params KernelData kd = KernelData::Default(params); convolution_params& newParams = *static_cast(kd.params.get()); - CommonDispatchData runInfo = SetDefault(newParams); + CommonDispatchData dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, static_cast(newParams.inputs.size())); return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp index 8f5384a..2688edb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp @@ -33,26 +33,17 @@ JitConstants CTCGreedyDecoderKernelBase::GetJitConstants(const ctc_greedy_decode } CTCGreedyDecoderKernelBase::DispatchData CTCGreedyDecoderKernelBase::SetDefault(const ctc_greedy_decoder_params& params) const { - DispatchData kd; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + DispatchData dispatchData; - std::vector global = { 1, 1, 1 }; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { 1, 1, 1 }; + dispatchData.lws= GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& params, - const optional_params& options, - float estimated_time) const { + const optional_params& options, + float estimated_time) const { assert(params.GetType() == KernelType::CTC_GREEDY_DECODER); if (!Validate(params, options)) @@ -60,19 +51,17 @@ KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& param const ctc_greedy_decoder_params& orgParams = static_cast(params); - DispatchData runInfo; - - runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); - auto cldnn_jit = GetJitConstants(orgParams, runInfo); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h index 9c14de1..0181bce 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h @@ -44,7 +44,7 @@ public: using DispatchData = CommonDispatchData; protected: - virtual JitConstants GetJitConstants(const ctc_greedy_decoder_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const ctc_greedy_decoder_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const ctc_greedy_decoder_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp index 43a3cec..3941259 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp @@ -83,22 +83,13 @@ JitConstants CumSumKernelBase::GetJitConstants(const cum_sum_params& params, Dis } CumSumKernelBase::DispatchData CumSumKernelBase::SetDefault(const cum_sum_params& params) const { - DispatchData runInfo; - std::vector global = {params.output.Batch().v, - params.output.Feature().v * params.output.W().v, - params.output.Z().v * params.output.Y().v * params.output.X().v}; + DispatchData dispatchData; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v * params.output.W().v, + params.output.Z().v * params.output.Y().v * params.output.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params, @@ -111,14 +102,14 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params, return {}; } - auto runInfo = SetDefault(newParams); + auto dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); - auto cldnn_jit = GetJitConstants(newParams, runInfo); + auto cldnn_jit = GetJitConstants(newParams, dispatchData); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h index 920551f..4ec34bb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.h @@ -56,7 +56,7 @@ protected: int32_t GetCumSumAxisIndex(const cum_sum_params& params) const; size_t GetRealAxisIndex(const cum_sum_params& params) const; ParamsKey GetSupportedKey() const override; - virtual JitConstants GetJitConstants(const cum_sum_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const cum_sum_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const; bool Validate(const Params&, const optional_params&) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp index 89d91b4..2a2a2a2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp @@ -25,15 +25,15 @@ namespace kernel_selector { static constexpr size_t simd = 16; static constexpr size_t BLOCK_SIZE = 16; -JitConstants CumSumKernelPartialSum::GetJitConstants(const cum_sum_params& params, DispatchData kd) const { - auto jits = CumSumKernelBase::GetJitConstants(params, kd); +JitConstants CumSumKernelPartialSum::GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const { + auto jits = CumSumKernelBase::GetJitConstants(params, dispatchData); auto activation_dt = GetActivationType(params); jits.Merge(MakeTypeJitConstants(activation_dt, "PARTIAL")); jits.AddConstant(MakeJitConstant("SIMD", simd)); - jits.AddConstant(MakeJitConstant("LWS", kd.lws0)); + jits.AddConstant(MakeJitConstant("LWS", dispatchData.lws[0])); jits.AddConstant(MakeJitConstant("BLOCK_SIZE", BLOCK_SIZE)); - jits.AddConstant(MakeJitConstant("SUM_ITEMS_NUM", kd.sum_items_num)); + jits.AddConstant(MakeJitConstant("SUM_ITEMS_NUM", dispatchData.sum_items_num)); return jits; } @@ -48,15 +48,15 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param KernelData kd = KernelData::Default(params, kernels_num); const cum_sum_params& newParams = *static_cast(kd.params.get()); - auto runInfo = SetDefaultForMulti(newParams); + auto dispatchData = SetDefaultForMulti(newParams); { // partial sum - auto cldnn_jit = GetJitConstants(newParams, runInfo.stage_1); + auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_1); cldnn_jit.AddConstant(MakeJitConstant("CUM_SUM_PARTIAL_SUM", 1)); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo.stage_1, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData.stage_1, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.clear(); // Clear original output argument kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); @@ -65,12 +65,12 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param { // Final auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); - auto cldnn_jit = GetJitConstants(newParams, runInfo.stage_final); + auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_final); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[1]; - FillCLKernelData(kernel, runInfo.stage_final, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData.stage_final, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.clear(); // Clear original output argument kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); @@ -83,7 +83,7 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param } CumSumKernelPartialSum::MultiDispatchData CumSumKernelPartialSum::SetDefaultForMulti(const cum_sum_params& params) const { - MultiDispatchData md; + MultiDispatchData dispatchData; std::vector dims = {params.output.Batch().v, params.output.Feature().v, params.output.W().v, @@ -108,23 +108,19 @@ CumSumKernelPartialSum::MultiDispatchData CumSumKernelPartialSum::SetDefaultForM } } - md.stage_1.gws0 = Align(gws[0], BLOCK_SIZE); - md.stage_1.gws1 = gws[1]; - md.stage_1.gws2 = gws[2]; - md.stage_1.lws0 = BLOCK_SIZE; - md.stage_1.lws1 = 1; - md.stage_1.lws2 = 1; - md.stage_1.sum_items_num = items_num; - - md.stage_final.gws0 = gws[0]; - md.stage_final.gws1 = gws[1]; - md.stage_final.gws2 = gws[2]; - md.stage_final.lws0 = 1; - md.stage_final.lws1 = 1; - md.stage_final.lws2 = 1; - md.stage_final.sum_items_num = Align(items_num, BLOCK_SIZE); - - return md; + dispatchData.stage_1.gws[0] = Align(gws[0], BLOCK_SIZE); + dispatchData.stage_1.gws[1] = gws[1]; + dispatchData.stage_1.gws[2] = gws[2]; + dispatchData.stage_1.lws[0] = BLOCK_SIZE; + dispatchData.stage_1.lws[1] = 1; + dispatchData.stage_1.lws[2] = 1; + dispatchData.stage_1.sum_items_num = items_num; + + dispatchData.stage_final.gws = gws; + dispatchData.stage_final.lws = { 1, 1, 1 }; + dispatchData.stage_final.sum_items_num = Align(items_num, BLOCK_SIZE); + + return dispatchData; } KernelsData CumSumKernelPartialSum::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h index d092e82..40330cd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h @@ -29,7 +29,7 @@ protected: DispatchData stage_final; }; - JitConstants GetJitConstants(const cum_sum_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const override; KernelsData GetMultiStageKernelsData(const Params& params, const optional_params&, float estimated_time) const; MultiDispatchData SetDefaultForMulti(const cum_sum_params& params) const; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp index fe05d26..ee71985 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.cpp @@ -20,8 +20,8 @@ #include namespace kernel_selector { -JitConstants CumSumKernelRef::GetJitConstants(const cum_sum_params& params, DispatchData kd) const { - auto jits = CumSumKernelBase::GetJitConstants(params, kd); +JitConstants CumSumKernelRef::GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const { + auto jits = CumSumKernelBase::GetJitConstants(params, dispatchData); jits.AddConstant(MakeJitConstant("AXIS_LAYOUT_INDEX", GetCumSumAxisIndex(params))); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h index 58099d7..4273653 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_ref.h @@ -24,7 +24,7 @@ public: CumSumKernelRef() : CumSumKernelBase("cum_sum_ref") {} virtual ~CumSumKernelRef() = default; protected: - JitConstants GetJitConstants(const cum_sum_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const override; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp index dbbc4a9..16f83ac 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp @@ -48,7 +48,7 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16::GetSupportedKey() const { } DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDefault(const deconvolution_params& params) const { - DispatchData kd = DeconvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = DeconvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -63,25 +63,26 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDef if (ver_bsv16_fsv16) { if (params.depthwise_separable_opt) { - kd.gws0 = x * y * z; - kd.gws1 = f; - kd.gws2 = b / 16; + dispatchData.gws[0] = x * y * z; + dispatchData.gws[1] = f; + dispatchData.gws[2] = b / 16; - kd.lws0 = 1; - kd.lws1 = sub_group_size; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; } else { - kd.gws0 = 64; - while (kd.gws0 > 16) { - if (f % kd.gws0 == 0) break; - kd.gws0 /= 2; + dispatchData.gws[0] = 64; + while (dispatchData.gws[0] > 16) { + if (f % dispatchData.gws[0] == 0) + break; + dispatchData.gws[0] /= 2; } - kd.gws1 = x * y * z; - kd.gws2 = CeilDiv(b, 16) * (f / kd.gws0) * params.groups; + dispatchData.gws[1] = x * y * z; + dispatchData.gws[2] = CeilDiv(b, 16) * (f / dispatchData.gws[0]) * params.groups; - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } } else { size_t x_block_size = 16; @@ -92,31 +93,32 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDef } x_block_size = std::max(x_block_size, (size_t)8); if (params.depthwise_separable_opt) { - kd.gws0 = CeilDiv(x, x_block_size) * y * z; - kd.gws1 = f; - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x, x_block_size) * y * z; + dispatchData.gws[1] = f; + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = sub_group_size; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; } else { - kd.gws0 = 64; - while (kd.gws0 > 16) { - if (f % kd.gws0 == 0) break; - kd.gws0 /= 2; + dispatchData.gws[0] = 64; + while (dispatchData.gws[0] > 16) { + if (f % dispatchData.gws[0] == 0) + break; + dispatchData.gws[0] /= 2; } - kd.gws1 = CeilDiv(x, x_block_size) * y * z; - kd.gws2 = b * (f / kd.gws0); + dispatchData.gws[1] = CeilDiv(x, x_block_size) * y * z; + dispatchData.gws[2] = b * (f / dispatchData.gws[0]); - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } } - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } bool DeconvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_params& o) const { @@ -230,10 +232,10 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const deconvolu jit.AddConstant(MakeJitConstant("IW_FULL", params.output.X().LogicalDimPadded())); - DispatchData runInfo = SetDefault(params); - jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0)); - jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1)); - jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2)); + DispatchData dispatchData = SetDefault(params); + jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0])); + jit.AddConstant(MakeJitConstant("LWS_1", dispatchData.lws[1])); + jit.AddConstant(MakeJitConstant("LWS_2", dispatchData.lws[2])); if (!params.fused_ops.empty()) { auto fused_dt = GetActivationType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp index cafa959..02a329c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp @@ -138,7 +138,7 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetSupportedKey() const { } DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16_dw::SetDefault(const deconvolution_params& params) const { - DispatchData kd = DeconvolutionKernelBase::SetDefault(params); + DispatchData dispatchData = DeconvolutionKernelBase::SetDefault(params); const auto& out = params.output; @@ -148,17 +148,17 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16_dw::Set auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = CeilDiv(x, GetDispatchParams(params).block_size_x) * y * z; - kd.gws1 = Align(f, feature_block_size); - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x, GetDispatchParams(params).block_size_x) * y * z; + dispatchData.gws[1] = Align(f, feature_block_size); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = sub_group_size; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } bool DeconvolutionKernel_b_fs_zyx_fsv16_dw::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp index 4a7d89e..1dc654c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp @@ -90,22 +90,24 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernelBase::SetDefault(const auto batch_size = params.output.Batch().v; auto output_features = params.output.Feature().v; - DispatchData kd; + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; size_t gws0 = output_features * batch_size; size_t lws0 = std::min(gws0, static_cast(32)); while (gws0 % lws0) { lws0--; } - kd.gws0 = gws0; - kd.gws1 = params.output.X().v; - kd.gws2 = params.output.Y().v * params.output.Z().v; - kd.lws0 = lws0; - kd.lws1 = 1; - kd.lws2 = 1; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - return kd; + + dispatchData.gws[0] = gws0; + dispatchData.gws[1] = params.output.X().v; + dispatchData.gws[2] = params.output.Y().v * params.output.Z().v; + + dispatchData.lws[0] = lws0; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + return dispatchData; } KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const optional_params& options) const { @@ -116,7 +118,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const } const deconvolution_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); deconvolution_params& newParams = *static_cast(kd.params.get()); @@ -137,7 +139,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, @@ -149,7 +151,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const GetFusedPrimitiveInputsCount(params)); kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp index 4084bdb..ac89b0b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp @@ -40,19 +40,20 @@ ParamsKey DeconvolutionKernel_bfyx_opt::GetSupportedKey() const { } CommonDispatchData DeconvolutionKernel_bfyx_opt::SetDefault(const deconvolution_params& params) const { - DispatchData kd; + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; auto wg_size = 16; - kd.gws0 = Align(params.output.X().v, wg_size * params.stride.x); - kd.gws1 = params.output.Y().v; - kd.gws2 = params.output.Batch().v * params.output.Feature().v; - kd.lws0 = wg_size; - kd.lws1 = 1; - kd.lws2 = 1; - kd.efficiency = FORCE_PRIORITY_6; - return kd; + dispatchData.gws[0] = Align(params.output.X().v, wg_size * params.stride.x); + dispatchData.gws[1] = params.output.Y().v; + dispatchData.gws[2] = params.output.Batch().v * params.output.Feature().v; + + dispatchData.lws[0] = wg_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + + dispatchData.efficiency = FORCE_PRIORITY_6; + return dispatchData; } JitConstants DeconvolutionKernel_bfyx_opt::GetJitConstants(const deconvolution_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp index e7e49ab..7a97f47 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp @@ -108,36 +108,28 @@ WeightsLayout DeconvolutionKernel_imad_along_f_tile_bfx::GetPreferredWeightsLayo } DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_along_f_tile_bfx::SetDefault(const deconvolution_params& params) const { - auto dispatch = Parent::SetDefault(params); + DispatchData dispatchData = Parent::SetDefault(params); auto tile_x = GetTileX(params); auto tile_ofm = GetTileOFM(params); auto tile_b = GetTileB(params); - std::vector global = { + dispatchData.gws = { CeilDiv(params.output.X().v, tile_x) * params.output.Y().v * params.output.Z().v, Align(CeilDiv(params.output.Feature().v, tile_ofm), simd), CeilDiv(params.output.Batch().v, tile_b) }; - std::vector local = { 1, simd, 1 }; - - dispatch.gws0 = global[0]; - dispatch.gws1 = global[1]; - dispatch.gws2 = global[2]; - - dispatch.lws0 = local[0]; - dispatch.lws1 = local[1]; - dispatch.lws2 = local[2]; + dispatchData.lws = { 1, simd, 1 }; // Currently most optimized for fsv16 formats if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16) { - dispatch.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; } else { - dispatch.efficiency = FORCE_PRIORITY_8; + dispatchData.efficiency = FORCE_PRIORITY_8; } - return dispatch; + return dispatchData; } JitConstants DeconvolutionKernel_imad_along_f_tile_bfx::GetJitConstants(const deconvolution_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp index da9b46f..1eb8d7b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp @@ -60,27 +60,19 @@ WeightsLayout DeconvolutionKernel_imad_ref::GetPreferredWeightsLayout(const deco } DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_ref::SetDefault(const deconvolution_params& params) const { - auto dispatch = Parent::SetDefault(params); + DispatchData dispatchData = Parent::SetDefault(params); - std::vector global = { + dispatchData.gws = { params.output.Feature().v, params.output.X().v * params.output.Y().v * params.output.Z().v, params.output.Batch().v }; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - dispatch.gws0 = global[0]; - dispatch.gws1 = global[1]; - dispatch.gws2 = global[2]; + dispatchData.efficiency = FORCE_PRIORITY_9; - dispatch.lws0 = local[0]; - dispatch.lws1 = local[1]; - dispatch.lws2 = local[2]; - - dispatch.efficiency = FORCE_PRIORITY_9; - - return dispatch; + return dispatchData; } JitConstants DeconvolutionKernel_imad_ref::GetJitConstants(const deconvolution_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp index b3d4268..b8cb81e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -62,20 +62,20 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const { } CommonDispatchData DeconvolutionKernelRef::SetDefault(const deconvolution_params& params) const { - CommonDispatchData runInfo = DeconvolutionKernelBase::SetDefault(params); + CommonDispatchData dispatchData = DeconvolutionKernelBase::SetDefault(params); if (params.output.Feature().v * params.output.Batch().v <= 16) { const auto& out = params.output; - runInfo.gws0 = Align(out.X().v, 32); - runInfo.gws1 = out.Y().v * out.Z().v; - runInfo.gws2 = out.Feature().v * out.Batch().v; + dispatchData.gws[0] = Align(out.X().v, 32); + dispatchData.gws[1] = out.Y().v * out.Z().v; + dispatchData.gws[2] = out.Feature().v * out.Batch().v; - runInfo.lws0 = 32; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } - return runInfo; + return dispatchData; } JitConstants DeconvolutionKernelRef::GetJitConstants(const deconvolution_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp index f120dcb..f13a64f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp @@ -40,23 +40,15 @@ bool DepthToSpaceKernelBase::Validate(const Params& p, const optional_params& o) } CommonDispatchData DepthToSpaceKernelBase::SetDefault(const depth_to_space_params& params) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = { params.output.Batch().v, - params.output.Feature().v, - params.output.Z().v * params.output.Y().v * params.output.X().v }; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Z().v * params.output.Y().v * params.output.X().v }; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const { @@ -80,14 +72,14 @@ KernelsData DepthToSpaceKernelBase::GetCommonKernelsData(const Params& params, c return {}; } - auto runInfo = SetDefault(newParams); + auto dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp index 665e535..4a87031 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp @@ -45,23 +45,14 @@ bool DepthToSpaceKernelBlock2Opt::Validate(const Params& p, const optional_param } CommonDispatchData DepthToSpaceKernelBlock2Opt::SetDefault(const depth_to_space_params& params) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = { Align(params.inputs[0].X().v / 2, 16), - params.inputs[0].Y().v, - 1}; + dispatchData.gws = { Align(params.inputs[0].X().v / 2, 16), + params.inputs[0].Y().v, + 1 }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants DepthToSpaceKernelBlock2Opt::GetJitConstants(const depth_to_space_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp index a941bdd..418b124 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,17 +48,17 @@ JitConstants DetectionOutputKernelBase::GetJitConstants(const detection_output_p return jit; } -DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault( - const detection_output_params& params) const { - DispatchData kd; +DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault(const detection_output_params& /*params*/) const { + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - kd.gws0 = 0; - kd.gws1 = 0; - kd.gws2 = 0; - kd.lws0 = 0; - kd.lws1 = 0; - kd.lws2 = 0; - return kd; + dispatchData.gws[0] = 0; + dispatchData.gws[1] = 0; + dispatchData.gws[2] = 0; + + dispatchData.lws[0] = 0; + dispatchData.lws[1] = 0; + dispatchData.lws[2] = 0; + + return dispatchData; } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp index a9b6602..a68d458 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ ParamsKey DetectionOutputKernel::GetSupportedKey() const { } CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_params& params) const { - CommonDispatchData runInfo = DetectionOutputKernelBase::SetDefault(params); + CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params); // Number of all work items is set to total number of bounding boxes - // one bounding box is procerssed by one work item @@ -54,15 +54,15 @@ CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_para bboxesNum = work_group_size * params.inputs[0].Batch().v; - runInfo.gws0 = Align(bboxesNum, work_group_size); - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = Align(bboxesNum, work_group_size); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = work_group_size; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = work_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const optional_params& options) const { @@ -70,14 +70,14 @@ KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const op KernelData kd = KernelData::Default(params); const detection_output_params& detectOutParams = static_cast(params); - DispatchData runInfo = SetDefault(detectOutParams); + DispatchData dispatchData = SetDefault(detectOutParams); auto cldnnJit = GetJitConstants(detectOutParams); auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint); kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp index 91b9945..03de4a7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ ParamsKey DetectionOutputKernel_sort::GetSupportedKey() const { } CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output_params& params) const { - CommonDispatchData runInfo = DetectionOutputKernelBase::SetDefault(params); + CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params); unsigned class_num = params.detectOutParams.num_classes; if (params.detectOutParams.share_location && params.detectOutParams.background_label_id == 0) { @@ -49,15 +49,15 @@ CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output work_group_size = (work_group_size + work_group_size % 2) / (work_group_size / 256 + 1); } - runInfo.gws0 = Align(bboxesNum, work_group_size); - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = Align(bboxesNum, work_group_size); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = work_group_size; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = work_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, const optional_params& options) const { @@ -66,14 +66,14 @@ KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, con KernelData kd = KernelData::Default(params); const detection_output_params& detectOutParams = static_cast(params); - DispatchData runInfo = SetDefault(detectOutParams); + DispatchData dispatchData = SetDefault(detectOutParams); auto cldnnJit = GetJitConstants(detectOutParams); auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint); kd.estimatedTime = FORCE_PRIORITY_8; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp index e180e8a..7f12c6b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp @@ -200,23 +200,23 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_ } EltwiseKernelBase::DispatchData EltwiseKernel_b_fs_yx_fsv16::SetDefault(const eltwise_params& params) const { - DispatchData kd; + DispatchData dispatchData; - kd.gws0 = Align(params.output.Feature().v, 16); - kd.gws1 = CeilDiv(params.output.X().v, GetBlockSize(params)) * params.output.Y().v; - kd.gws2 = params.output.Batch().v; + dispatchData.gws[0] = Align(params.output.Feature().v, 16); + dispatchData.gws[1] = CeilDiv(params.output.X().v, GetBlockSize(params)) * params.output.Y().v; + dispatchData.gws[2] = params.output.Batch().v; - kd.lws0 = 16; - kd.lws1 = 16; - while (kd.lws1 > 1) { - if (kd.gws1 % kd.lws1 == 0) + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 16; + while (dispatchData.lws[1] > 1) { + if (dispatchData.gws[1] % dispatchData.lws[1] == 0) break; - kd.lws1--; + dispatchData.lws[1]--; } - kd.lws2 = 1; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_1; - return kd; + dispatchData.efficiency = FORCE_PRIORITY_1; + return dispatchData; } KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, const optional_params& options) const { @@ -231,12 +231,12 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); - DispatchData runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto& kernel = kd.kernels[0]; - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), @@ -244,7 +244,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co false, GetFusedPrimitiveInputsCount(params)); - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp index 8de307d..f8bc154 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp @@ -512,17 +512,14 @@ JitConstants EltwiseKernelBase::GetJitConstants(const eltwise_params& params) co } EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const { - DispatchData kd; + DispatchData dispatchData; if (params.layoutBased || params.int8_quantization || params.broadcast) { - auto global = GetTensorFriendlyWorkGroups(params.output); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.gws = GetTensorFriendlyWorkGroups(params.output); } else if (CheckInputsOutputNoPitchSameDims(params)) { - kd.gws0 = params.output.LogicalSize(); - kd.gws1 = 1; - kd.gws2 = 1; + dispatchData.gws[0] = params.output.LogicalSize(); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; } else { const auto& out = params.output; @@ -536,60 +533,58 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para gws.push_back(1U); } - kd.gws0 = gws[0]; + dispatchData.gws[0] = gws[0]; if (n_dims == 6) { - kd.gws1 = gws[1] * gws[2] * gws[3]; // y*z*w - kd.gws2 = gws[4] * gws[5]; + dispatchData.gws[1] = gws[1] * gws[2] * gws[3]; // y*z*w + dispatchData.gws[2] = gws[4] * gws[5]; } else if (n_dims == 5) { - kd.gws1 = gws[1] * gws[2]; // y*z - kd.gws2 = gws[3] * gws[4]; + dispatchData.gws[1] = gws[1] * gws[2]; // y*z + dispatchData.gws[2] = gws[3] * gws[4]; } else { - kd.gws1 = gws[1]; - kd.gws2 = gws[2] * gws[3]; + dispatchData.gws[1] = gws[1]; + dispatchData.gws[2] = gws[2] * gws[3]; } } - auto local = GetOptimalLocalWorkGroupSizes({kd.gws0, kd.gws1, kd.gws2}, params.engineInfo); + auto local = GetOptimalLocalWorkGroupSizes({dispatchData.gws[0], dispatchData.gws[1], dispatchData.gws[2]}, params.engineInfo); const size_t optimal_lws_values[] = {256, 224, 192, 160, 128, 96, 64, 32, 16}; if ((params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 || params.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 || params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16) && - params.output.Feature().v % 16 == 0 && kd.gws1 % 16 == 0) { - kd.lws0 = 1; + params.output.Feature().v % 16 == 0 && dispatchData.gws[1] % 16 == 0) { + dispatchData.lws[0] = 1; for (auto lws : optimal_lws_values) { - if (kd.gws1 % lws == 0) { - kd.lws1 = lws; + if (dispatchData.gws[1] % lws == 0) { + dispatchData.lws[1] = lws; break; } } - kd.lws2 = 1; + dispatchData.lws[2] = 1; } else if (params.output.GetLayout() == DataLayout::fs_b_yx_fsv32) { - kd.gws2 = Align(kd.gws2, 32); - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 32; + dispatchData.gws[2] = Align(dispatchData.gws[2], 32); + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 32; } else if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv32 && params.output.Feature().v % 32 == 0) { if (params.layoutBased || params.int8_quantization || params.broadcast) { - kd.lws0 = 1; - kd.lws1 = 32; - kd.lws2 = 1; - } else if (kd.gws0 == params.output.LogicalSize()) { - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 32; + dispatchData.lws[2] = 1; + } else if (dispatchData.gws[0] == params.output.LogicalSize()) { + dispatchData.lws = local; } else { - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 32; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 32; } } else { - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.lws[0] = local[0]; + dispatchData.lws[1] = local[1]; + dispatchData.lws[2] = local[2]; } - return kd; + return dispatchData; } KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const { @@ -604,12 +599,12 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); - DispatchData runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto& kernel = kd.kernels[0]; - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp index 4abb291..0f5f71e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp @@ -21,26 +21,6 @@ namespace kernel_selector { -namespace { -std::shared_ptr GetJit_GetIndexForDataLayout(std::string jitName, - std::string prefix, - DataLayout dataLayout) { - std::string jitValue; - switch (dataLayout) { - case DataLayout::byxf: - jitValue += "GET_DATA_INDEX("; - break; - case DataLayout::fs_b_yx_fsv32: - jitValue += "GET_DATA_FS_B_YX_FSV32_INDEX("; - break; - default: - throw std::runtime_error("incorrect data_layout"); - } - jitValue += prefix + ",b,f,y,x)"; - - return MakeJitConstant(jitName, jitValue); -} -} // namespace // TODO: [blocked_formats] does fp32 work well with kernel? ParamsKey EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32::GetSupportedKey() const { ParamsKey k; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp index 333ecc2..8c73282 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embedding_bag/embedding_bag_kernel_ref.cpp @@ -43,23 +43,14 @@ JitConstants EmbeddingBagKernelRef::GetJitConstants(const embedding_bag_params& } CommonDispatchData EmbeddingBagKernelRef::SetDefault(const embedding_bag_params& params) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = { params.output.Batch().v, - params.output.Feature().v, - params.output.Y().v * params.output.X().v }; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Y().v * params.output.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { @@ -70,7 +61,7 @@ KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const op return {}; } - auto runInfo = SetDefault(newParams); + auto dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); @@ -78,7 +69,7 @@ KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const op auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp index f3c3e7c..47083f7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp @@ -53,23 +53,14 @@ JitConstants ExtractImagePatchesKernelBase::GetJitConstants(const extract_image_ } ExtractImagePatchesKernelBase::DispatchData ExtractImagePatchesKernelBase::SetDefault(const extract_image_patches_params& params) const { - DispatchData kd; + DispatchData dispatchData; - std::vector global = { params.output.Batch().v, - params.output.Feature().v, - params.output.Y().v * params.output.X().v }; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Y().v * params.output.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& params, @@ -81,7 +72,7 @@ KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& pa const auto& prim_params = static_cast(params); - auto run_info = SetDefault(prim_params); + auto dispatchData = SetDefault(prim_params); KernelData kd = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -89,7 +80,7 @@ KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& pa auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = estimated_time; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h index 33097aa..c3ad259 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h @@ -24,7 +24,7 @@ public: virtual ~FullyConnectedBlockKernelBase() {} protected: - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; // how many batches will a single work item compute virtual size_t GetBatchesPerWorkItem(const fully_connected_params& params) const; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp index 9617e45..234e027 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,18 +35,16 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params, int) const { DispatchData dispatchData; - dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; // Determine global work sizes. - dispatchData.gws0 = params.output.LogicalSize(); - dispatchData.gws1 = dispatchData.gws2 = 1; + dispatchData.gws = { params.output.LogicalSize(), 1, 1 }; // Find largest positive local work size that is divider for global work size. - dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast(1)), static_cast(32)); - while (dispatchData.gws0 % dispatchData.lws0 != 0) { - --dispatchData.lws0; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - dispatchData.lws1 = dispatchData.lws2 = 1; + dispatchData.lws[1] = dispatchData.lws[2] = 1; return dispatchData; } @@ -99,8 +97,8 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms, auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options); - const DispatchData runInfo = SetDefault(newParams, autoTuneIndex); - auto cldnn_jit = GetJitConstants(newParams, runInfo); + const DispatchData dispatchData = SetDefault(newParams, autoTuneIndex); + auto cldnn_jit = GetJitConstants(newParams, dispatchData); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); uint32_t fused_deps_total = 0; @@ -112,7 +110,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms, auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h index f732cf8..b3da6c9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h @@ -63,7 +63,7 @@ public: const int autoTuneIndex = -1) const; protected: - virtual JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const; + virtual JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const; virtual DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const; KernelsData GetCommonKernelsData(const Params ¶ms, const optional_params &options, @@ -74,7 +74,7 @@ protected: int autoTuneIndex = -1) const; // Fused ops - virtual JitConstants GetFusedPrimitivesJitConstants(const fully_connected_params& params, const DispatchData& kd) const; + virtual JitConstants GetFusedPrimitivesJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const; Datatype GetActivationType(const fully_connected_params& params) const; // --Fused ops diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp index 05e17cc..4fdeed2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp @@ -39,30 +39,22 @@ ParamsKey FullyConnected_bf_io_GEMM::GetSupportedKey() const { FullyConnected_bf_io_GEMM::DispatchData FullyConnected_bf_io_GEMM::SetDefault(const fully_connected_params& params, int autoTuneIndex) const { - auto runInfo = Parent::SetDefault(params, autoTuneIndex); + auto dispatchData = Parent::SetDefault(params, autoTuneIndex); const uint32_t localWorkSizeX = 64; const uint32_t globalWorkSizeX = localWorkSizeX; - std::vector global = {globalWorkSizeX, params.output.Feature().v, params.output.Batch().v}; - std::vector local = {localWorkSizeX, 1, 1}; + dispatchData.gws = { globalWorkSizeX, params.output.Feature().v, 1 }; + dispatchData.lws = { localWorkSizeX, 1, 1 }; - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = 1; + dispatchData.efficiency = FORCE_PRIORITY_6; - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = 1; - - runInfo.efficiency = FORCE_PRIORITY_6; - - return runInfo; + return dispatchData; } JitConstants FullyConnected_bf_io_GEMM::GetJitConstants(const fully_connected_params& params, - const DispatchData& kd) const { - auto jit = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); if (params.inputs[0].GetDType() == Datatype::F16) { jit.AddConstant(MakeJitConstant("__fc_f16", "")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h index 162950b..edfd5bd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h @@ -29,6 +29,6 @@ public: protected: DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp index 7b0d270..aaa22fd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp @@ -35,27 +35,28 @@ ParamsKey FullyConnected_bf_io_input_spatial::GetSupportedKey() const { FullyConnected_bf_io_input_spatial::DispatchData FullyConnected_bf_io_input_spatial::SetDefault( const fully_connected_params& arg, int) const { - auto kd = FullyConnectedKernelBase::SetDefault(arg); + auto dispatchData = FullyConnectedKernelBase::SetDefault(arg); - kd.gws0 = Align(arg.output.LogicalSize() / arg.inputs[0].Batch().v, 16); - kd.gws1 = arg.inputs[0].Batch().v; - kd.gws2 = 1; - kd.lws0 = 16; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.gws[0] = Align(arg.output.LogicalSize() / arg.inputs[0].Batch().v, 16); + dispatchData.gws[1] = arg.inputs[0].Batch().v; + dispatchData.gws[2] = 1; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; const auto& input = arg.inputs[0]; const auto& output = arg.output; if (input.Batch().v == 1 && output.Batch().v == 1) { if ((input.LogicalSize() / output.Batch().v >= 4096) && (output.Feature().v >= 4096)) { - kd.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; } } - return kd; + return dispatchData; } bool FullyConnected_bf_io_input_spatial::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp index cd7bbcc..5caf4eb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp @@ -32,8 +32,8 @@ ParamsKey FullyConnected_bf_io_ref::GetSupportedKey() const { return k; } -JitConstants FullyConnected_bf_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants FullyConnected_bf_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h index 331ccf5..6ce6621 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h @@ -31,6 +31,6 @@ protected: std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 0101ace..858c43b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -182,7 +182,7 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, .Case(tune_params(16, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, AGE_BASED)) .Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, AGE_BASED)); } - + if (dtype == Datatype::F32) { // tune_params(tile_b, tile_ofm, tile_ifm, tile_k, dispatch_bsv, dispatch_fsv, exec_options) selector.Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 1, 16, 2, AGE_BASED)) @@ -195,17 +195,17 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, selector.Case([&](const fully_connected_params&) -> tune_params { tune_params result(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, DEFAULT); - + while (batch % result.tile_b != 0) result.tile_b--; - + result.dispatch_bsv = 16; while (batch % (result.tile_b * result.dispatch_bsv) != 0) result.dispatch_bsv--; if (result.tile_b >= 8) result.exec_options = AGE_BASED; - + return result; }); @@ -214,43 +214,43 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, FullyConnected_bf_tiled::DispatchData FullyConnected_bf_tiled::SetDefault(const fully_connected_params& params, int autoTuneIndex) const { - auto runInfo = Parent::SetDefault(params); + auto dispatchData = Parent::SetDefault(params); auto tparams = GetAutoTuneParams(params, autoTuneIndex); size_t feature_threads = CeilDiv(params.output.Feature().v, tparams.tile_ofm * simd); size_t batch_threads = params.output.Batch().v / tparams.tile_b; - runInfo.gws0 = feature_threads * batch_threads * simd; - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = feature_threads * batch_threads * simd; + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = simd; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = simd; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - runInfo.tile_m = tparams.tile_b; - runInfo.tile_n = tparams.tile_ofm; - runInfo.tile_mk = tparams.tile_ifm; - runInfo.tile_nk = tparams.tile_k; - runInfo.tile_ms = tparams.dispatch_bsv; - runInfo.tile_ns = tparams.dispatch_fsv; + dispatchData.tile_m = tparams.tile_b; + dispatchData.tile_n = tparams.tile_ofm; + dispatchData.tile_mk = tparams.tile_ifm; + dispatchData.tile_nk = tparams.tile_k; + dispatchData.tile_ms = tparams.dispatch_bsv; + dispatchData.tile_ns = tparams.dispatch_fsv; - return runInfo; + return dispatchData; } -JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstant(MakeJitConstant("SIMD", simd)); - jit.AddConstant(MakeJitConstant("TILE_B", kd.tile_m)); - jit.AddConstant(MakeJitConstant("TILE_OFM", kd.tile_n)); - jit.AddConstant(MakeJitConstant("TILE_IFM", kd.tile_mk)); - jit.AddConstant(MakeJitConstant("TILE_K", kd.tile_nk)); - jit.AddConstant(MakeJitConstant("TILE_K_OFM", kd.tile_nk * kd.tile_n)); - jit.AddConstant(MakeJitConstant("DISPATCH_BSV", kd.tile_ms)); - jit.AddConstant(MakeJitConstant("DISPATCH_FSV", kd.tile_ns)); + jit.AddConstant(MakeJitConstant("TILE_B", dispatchData.tile_m)); + jit.AddConstant(MakeJitConstant("TILE_OFM", dispatchData.tile_n)); + jit.AddConstant(MakeJitConstant("TILE_IFM", dispatchData.tile_mk)); + jit.AddConstant(MakeJitConstant("TILE_K", dispatchData.tile_nk)); + jit.AddConstant(MakeJitConstant("TILE_K_OFM", dispatchData.tile_nk * dispatchData.tile_n)); + jit.AddConstant(MakeJitConstant("DISPATCH_BSV", dispatchData.tile_ms)); + jit.AddConstant(MakeJitConstant("DISPATCH_FSV", dispatchData.tile_ns)); - jit.Merge(MakeConstantLoopUnrollJitConstants(kd.tile_m)); + jit.Merge(MakeConstantLoopUnrollJitConstants(dispatchData.tile_m)); bool realign_fp16_offset = params.inputs[0].GetDType() == Datatype::F16 && params.output.GetFirstElementOffset() % 2 != 0; jit.AddConstant(MakeJitConstant("REALIGN_FP16_OFFSET", realign_fp16_offset)); @@ -262,14 +262,14 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para if (!params.fused_ops.empty()) { auto boundary_check = BoundaryCheck::DISABLED; - if (params.output.Feature().v % (kd.tile_n * simd) != 0) + if (params.output.Feature().v % (dispatchData.tile_n * simd) != 0) boundary_check = BoundaryCheck::ENABLED; FusedOpsConfiguration conf = { "", {"(out_b + bi)", "out_f", "0", "0"}, "activated[bi]", activation_dt, - kd.tile_n, + dispatchData.tile_n, LoadType::LT_ALIGNED_READ, boundary_check, IndexType::TENSOR_COORD, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h index 72d0e77..e795165 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.h @@ -68,7 +68,7 @@ protected: FusedOpType::SCALE, FusedOpType::QUANTIZE }; } - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& params, const optional_params& options) const override; tune_params GetAutoTuneParams(const fully_connected_params& params, int idx = -1) const; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp index b6db4b9..4937335 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,25 +48,17 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const { FullyConnected_bfyx_Ref::DispatchData FullyConnected_bfyx_Ref::SetDefault(const fully_connected_params& params, int) const { - auto runInfo = Parent::SetDefault(params); + auto dispatchData = Parent::SetDefault(params); - std::vector global = {params.output.Feature().v, params.output.Batch().v}; - std::vector local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { params.output.Feature().v, params.output.Batch().v, 1 }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = 1; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = 1; - - return runInfo; + return dispatchData; } JitConstants FullyConnected_bfyx_Ref::GetJitConstants(const fully_connected_params& params, - const FullyConnectedKernelBase::DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); + const FullyConnectedKernelBase::DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); Datatype accumulator_dt; Datatype activation_dt; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h index 2965a0d..e47bb2f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h @@ -36,6 +36,6 @@ protected: FusedOpType::ACTIVATION }; } bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp index a89e55c..beda9cb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,15 +34,15 @@ ParamsKey FullyConnected_bs_f_bsv16_af8::GetSupportedKey() const { FullyConnected_bs_f_bsv16_af8::DispatchData FullyConnected_bs_f_bsv16_af8::SetDefault(const fully_connected_params& arg, int) const { - auto kd = FullyConnectedBlockKernelBase::SetDefault(arg); + auto dispatchData = FullyConnectedBlockKernelBase::SetDefault(arg); size_t groups_per_batches = GetLocalGroupsSize(arg); - kd.gws0 = Align(arg.output.LogicalSize() / (GetBatchesPerWorkItem(arg) * groups_per_batches), 16); - kd.gws1 = groups_per_batches; - kd.lws0 = 16; - kd.lws1 = 1; + dispatchData.gws[0] = Align(arg.output.LogicalSize() / (GetBatchesPerWorkItem(arg) * groups_per_batches), 16); + dispatchData.gws[1] = groups_per_batches; + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 1; - return kd; + return dispatchData; } static bool check_input_layout(const DataTensor& t) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp index 49160b5..f20dbe7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,12 +34,12 @@ ParamsKey FullyConnected_bs_f_bsv16_b1::GetSupportedKey() const { JitConstants FullyConnected_bs_f_bsv16_b1::GetJitConstants( const fully_connected_params& params, - const FullyConnectedKernelBase::DispatchData& run_info) const { - auto& d = static_cast(run_info); - auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, run_info); + const FullyConnectedKernelBase::DispatchData& dispatchData) const { + auto& d = static_cast(dispatchData); + auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, dispatchData); cldnn_jit.AddConstants({ - MakeJitConstant("SUB_GROUP_SIZE", run_info.lws0), - MakeJitConstant("WORK_ITEMS_PER_BATCH", run_info.gws1), + MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]), + MakeJitConstant("WORK_ITEMS_PER_BATCH", dispatchData.gws[1]), MakeJitConstant("UNIT_BYTE_SIZE", d.unit_byte_size), MakeJitConstant("CHUNK_TYPE", d.chunk_type), @@ -56,11 +56,11 @@ JitConstants FullyConnected_bs_f_bsv16_b1::GetJitConstants( FullyConnected_bs_f_bsv16_b1::DispatchData FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg, int) const { - DispatchData run_info = FullyConnectedKernelBase::SetDefault(arg); + DispatchData dispatchData = FullyConnectedKernelBase::SetDefault(arg); // Properties of chunk and unit. const char* chunk_type = "uint"; - const uint32_t unit_byte_size = run_info.fp16UnitUsed ? sizeof(short) : sizeof(float); + const uint32_t unit_byte_size = BytesPerElement(arg.inputs[0].GetDType()); constexpr uint32_t chunk_byte_size = sizeof(uint32_t); constexpr uint32_t sub_group_size = 16; const uint32_t units_per_chunk = chunk_byte_size / unit_byte_size; @@ -73,23 +73,23 @@ FullyConnected_bs_f_bsv16_b1::DispatchData FullyConnected_bs_f_bsv16_b1::SetDefa const auto response_size = arg.output.Feature().v; auto rg_count = CeilDiv(response_size, responses_per_sg_exec); - run_info.lws0 = sub_group_size; + dispatchData.lws[0] = sub_group_size; // Number of work items needed to process all response groups. - run_info.gws0 = rg_count * sub_group_size; - run_info.lws1 = run_info.lws2 = 1; - run_info.gws1 = run_info.gws2 = 1; + dispatchData.gws[0] = rg_count * sub_group_size; + dispatchData.lws[1] = dispatchData.lws[2] = 1; + dispatchData.gws[1] = dispatchData.gws[2] = 1; - run_info.unit_byte_size = unit_byte_size; - run_info.chunk_type = chunk_type; - run_info.chunk_byte_size = chunk_byte_size; - run_info.units_per_chunk = units_per_chunk; - run_info.bytes_per_sg_read = sub_group_size * chunk_byte_size; - run_info.units_per_sg_read = units_per_sg_read; - run_info.responses_per_sg_exec = responses_per_sg_exec; - run_info.in_chunk_prefetch_size = 2; - run_info.filter_chunk_prefetch_size = responses_per_sg_exec; + dispatchData.unit_byte_size = unit_byte_size; + dispatchData.chunk_type = chunk_type; + dispatchData.chunk_byte_size = chunk_byte_size; + dispatchData.units_per_chunk = units_per_chunk; + dispatchData.bytes_per_sg_read = sub_group_size * chunk_byte_size; + dispatchData.units_per_sg_read = units_per_sg_read; + dispatchData.responses_per_sg_exec = responses_per_sg_exec; + dispatchData.in_chunk_prefetch_size = 2; + dispatchData.filter_chunk_prefetch_size = responses_per_sg_exec; - return run_info; + return dispatchData; } KernelsData FullyConnected_bs_f_bsv16_b1::GetKernelsData(const Params& params, const optional_params& optParams) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h index 03422b8..30e3830 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h @@ -28,7 +28,7 @@ public: protected: JitConstants GetJitConstants(const fully_connected_params& params, - const FullyConnectedKernelBase::DispatchData& kd) const override; + const FullyConnectedKernelBase::DispatchData& dispatchData) const override; DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp index 60e879a..ebc6da8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -36,17 +36,17 @@ ParamsKey FullyConnected_bs_f_bsv8_af8::GetSupportedKey() const { FullyConnected_bs_f_bsv8_af8::DispatchData FullyConnected_bs_f_bsv8_af8::SetDefault(const fully_connected_params& arg, int) const { - auto kd = FullyConnectedBlockKernelBase::SetDefault(arg); + auto dispatchData = FullyConnectedBlockKernelBase::SetDefault(arg); size_t groups_per_batches = GetLocalGroupsSize(arg); - kd.gws0 = + dispatchData.gws[0] = Align(arg.output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8); - kd.gws1 = groups_per_batches; - kd.lws0 = 8; - kd.lws1 = 1; + dispatchData.gws[1] = groups_per_batches; + dispatchData.lws[0] = 8; + dispatchData.lws[1] = 1; - return kd; + return dispatchData; } static bool check_input_layout(const DataTensor& t) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp index 62ea7f2..77e720d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,18 +48,18 @@ size_t FullyConnected_fb_io_b8_f8::GetBatchesPerWorkItem(const fully_connected_p FullyConnected_fb_io_b8_f8::DispatchData FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg, int) const { - auto kd = FullyConnectedBlockKernelBase::SetDefault(arg); + auto dispatchData = FullyConnectedBlockKernelBase::SetDefault(arg); const auto& output = arg.output; size_t groups_per_batches = GetLocalGroupsSize(arg); - kd.gws0 = + dispatchData.gws[0] = Align(output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8); - kd.gws1 = groups_per_batches; - kd.lws0 = 8; - kd.lws1 = 1; + dispatchData.gws[1] = groups_per_batches; + dispatchData.lws[0] = 8; + dispatchData.lws[1] = 1; - return kd; + return dispatchData; } bool FullyConnected_fb_io_b8_f8::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp index 4af19b2..2ec01a1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ ParamsKey FullyConnected_fb_io_block::GetSupportedKey() const { FullyConnected_fb_io_block::DispatchData FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg, int) const { - auto kd = FullyConnectedKernelBase::SetDefault(arg); + auto dispatchData = FullyConnectedKernelBase::SetDefault(arg); const auto& output = arg.output; auto batch_size = output.Batch().v; @@ -50,37 +50,37 @@ FullyConnected_fb_io_block::DispatchData FullyConnected_fb_io_block::SetDefault( // for at least one input data set from batch. auto rg_count = CeilDiv(response_size, units_per_sg_read); - kd.lws0 = sub_group_size; + dispatchData.lws[0] = sub_group_size; // Number of work items needed to process all response groups. - kd.gws0 = rg_count * sub_group_size; - kd.lws1 = 1; - kd.gws1 = batch_size / units_per_sg_read; - - kd.unit_byte_size = unit_byte_size; - kd.chunk_type = chunk_type; - kd.chunk_byte_size = chunk_byte_size; - kd.units_per_chunk = units_per_chunk; - kd.bytes_per_sg_read = sub_group_size * chunk_byte_size; - kd.units_per_sg_read = units_per_sg_read; - kd.rg_count = (uint32_t)rg_count; - kd.last_rg_size = response_size % units_per_sg_read; - return kd; + dispatchData.gws[0] = rg_count * sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.gws[1] = batch_size / units_per_sg_read; + + dispatchData.unit_byte_size = unit_byte_size; + dispatchData.chunk_type = chunk_type; + dispatchData.chunk_byte_size = chunk_byte_size; + dispatchData.units_per_chunk = units_per_chunk; + dispatchData.bytes_per_sg_read = sub_group_size * chunk_byte_size; + dispatchData.units_per_sg_read = units_per_sg_read; + dispatchData.rg_count = (uint32_t)rg_count; + dispatchData.last_rg_size = response_size % units_per_sg_read; + return dispatchData; } JitConstants FullyConnected_fb_io_block::GetJitConstants(const fully_connected_params& params, - const FullyConnectedKernelBase::DispatchData& run_info) const { - auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, run_info); + const FullyConnectedKernelBase::DispatchData& dispatchData) const { + auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, dispatchData); cldnn_jit.AddConstants({ - MakeJitConstant("SUB_GROUP_SIZE", run_info.lws0), - MakeJitConstant("WORK_ITEMS_PER_BATCH", run_info.gws1), - MakeJitConstant("UNIT_BYTE_SIZE", run_info.unit_byte_size), - MakeJitConstant("CHUNK_TYPE", run_info.chunk_type), - MakeJitConstant("CHUNK_BYTE_SIZE", run_info.chunk_byte_size), - MakeJitConstant("UNITS_PER_CHUNK", run_info.units_per_chunk), - MakeJitConstant("BYTES_PER_SG_READ", run_info.bytes_per_sg_read), - MakeJitConstant("UNITS_PER_SG_READ", run_info.units_per_sg_read), - MakeJitConstant("RG_COUNT", run_info.rg_count), - MakeJitConstant("LAST_RG_SIZE", run_info.last_rg_size), + MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[0]), + MakeJitConstant("WORK_ITEMS_PER_BATCH", dispatchData.gws[1]), + MakeJitConstant("UNIT_BYTE_SIZE", dispatchData.unit_byte_size), + MakeJitConstant("CHUNK_TYPE", dispatchData.chunk_type), + MakeJitConstant("CHUNK_BYTE_SIZE", dispatchData.chunk_byte_size), + MakeJitConstant("UNITS_PER_CHUNK", dispatchData.units_per_chunk), + MakeJitConstant("BYTES_PER_SG_READ", dispatchData.bytes_per_sg_read), + MakeJitConstant("UNITS_PER_SG_READ", dispatchData.units_per_sg_read), + MakeJitConstant("RG_COUNT", dispatchData.rg_count), + MakeJitConstant("LAST_RG_SIZE", dispatchData.last_rg_size), }); return cldnn_jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h index 550c9ee..8545ae5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h @@ -29,7 +29,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; JitConstants GetJitConstants(const fully_connected_params& params, - const FullyConnectedKernelBase::DispatchData& kd) const override; + const FullyConnectedKernelBase::DispatchData& dispatchData) const override; DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp index f769fde..ed8aa49 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp @@ -32,8 +32,8 @@ ParamsKey FullyConnected_fb_io_ref::GetSupportedKey() const { return k; } -JitConstants FullyConnected_fb_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants FullyConnected_fb_io_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h index ee844a7..2a45ee3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h @@ -31,6 +31,6 @@ protected: std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp index e2254ce..64cd0d8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,15 +33,15 @@ ParamsKey FullyConnected_fb_oi_b8_ref::GetSupportedKey() const { FullyConnected_fb_oi_b8_ref::DispatchData FullyConnected_fb_oi_b8_ref::SetDefault(const fully_connected_params& arg, int) const { - auto kd = FullyConnectedKernelBase::SetDefault(arg); + auto dispatchData = FullyConnectedKernelBase::SetDefault(arg); const auto& output = arg.output; - kd.gws0 = output.Batch().v; - kd.gws1 = output.LogicalSize() / kd.gws0; - kd.lws0 = 8; - kd.lws1 = 1; + dispatchData.gws[0] = output.Batch().v; + dispatchData.gws[1] = output.LogicalSize() / dispatchData.gws[0]; + dispatchData.lws[0] = 8; + dispatchData.lws[1] = 1; - return kd; + return dispatchData; } bool FullyConnected_fb_oi_b8_ref::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp index bcfedd6..037e536 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp @@ -33,8 +33,8 @@ ParamsKey FullyConnected_fb_oi_ref::GetSupportedKey() const { } -JitConstants FullyConnected_fb_oi_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants FullyConnected_fb_oi_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h index 1461a23..3780103 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h @@ -31,6 +31,6 @@ protected: std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp index 718992c..6fba66a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.cpp @@ -44,28 +44,28 @@ ParamsKey FullyConnected_fs_byx_fsv32::GetSupportedKey() const { FullyConnected_fs_byx_fsv32::Parent::DispatchData FullyConnected_fs_byx_fsv32::SetDefault( const fully_connected_params& params, int autoTuneIndex) const { - auto runInfo = Parent::SetDefault(params, autoTuneIndex); + auto dispatchData = Parent::SetDefault(params, autoTuneIndex); auto blockSizeB = std::min(outputBlockSizeB, params.output.Batch().v); auto blockNumB = CeilDiv(params.output.Batch().v, blockSizeB); auto wgHeight = std::min(preferredWGHeight, blockNumB); - runInfo.gws0 = CeilDiv(params.output.Feature().v, outputBlockSizeF); - runInfo.gws1 = RoundUp(blockNumB, wgHeight); - runInfo.gws2 = subGroupSize; + dispatchData.gws[0] = CeilDiv(params.output.Feature().v, outputBlockSizeF); + dispatchData.gws[1] = RoundUp(blockNumB, wgHeight); + dispatchData.gws[2] = subGroupSize; - runInfo.lws0 = 1; - runInfo.lws1 = wgHeight; - runInfo.lws2 = subGroupSize; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = wgHeight; + dispatchData.lws[2] = subGroupSize; - runInfo.efficiency = FORCE_PRIORITY_5; + dispatchData.efficiency = FORCE_PRIORITY_5; - return runInfo; + return dispatchData; } JitConstants FullyConnected_fs_byx_fsv32::GetJitConstants(const fully_connected_params& params, - const DispatchData& kd) const { - auto jit = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); auto blockSizeB = std::min(outputBlockSizeB, params.output.Batch().v); auto blockNumB = CeilDiv(params.output.Batch().v, blockSizeB); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h index 350d800..77511e9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fs_byx_fsv32.h @@ -28,6 +28,6 @@ public: protected: ParamsKey GetSupportedKey() const override; DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp index ded8ebb..753916c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp @@ -55,17 +55,17 @@ FullyConnectedKernelIMAD::Parent::DispatchData FullyConnectedKernelIMAD::SetDefa int) const { const int simdSize = 16; - auto runInfo = Parent::SetDefault(params); + auto dispatchData = Parent::SetDefault(params); - runInfo.gws0 = RoundUp(params.output.Feature().v, simdSize); - runInfo.gws1 = params.output.Batch().v; - runInfo.gws2 = 1; + dispatchData.gws[0] = RoundUp(params.output.Feature().v, simdSize); + dispatchData.gws[1] = params.output.Batch().v; + dispatchData.gws[2] = 1; - runInfo.lws0 = simdSize; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = simdSize; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } // SetDefault bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_params& options) const { @@ -95,8 +95,8 @@ bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_par return true; } // Validate -JitConstants FullyConnectedKernelIMAD::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const { - auto jit = Parent::GetJitConstants(params, kd); +JitConstants FullyConnectedKernelIMAD::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h index 718ecc8..f0de0be 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h @@ -31,7 +31,7 @@ public: protected: bool Validate(const Params& params, const optional_params& options) const override; DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, FusedOpType::SCALE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp index b560f6e..8b2e9f7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp @@ -82,28 +82,20 @@ FullyConnectedKernelMMAD::FullyConnectedTuningData FullyConnectedKernelMMAD::Set FullyConnectedKernelMMAD::DispatchData FullyConnectedKernelMMAD::SetDefault(const fully_connected_params& params, int) const { FullyConnectedTuningData tuning_data = SetTuningParams(params); - auto runInfo = Parent::SetDefault(params); + auto dispatchData = Parent::SetDefault(params); const auto& output = params.output; - std::vector global = { Align(output.Feature().v, tuning_data.sub_group_size) * tuning_data.slm_div_factor, output.Batch().v, 1 }; - std::vector local = { tuning_data.work_group_size, 1, 1 }; + dispatchData.gws = { Align(output.Feature().v, tuning_data.sub_group_size) * tuning_data.slm_div_factor, output.Batch().v, 1 }; + dispatchData.lws = { tuning_data.work_group_size, 1, 1 }; - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants FullyConnectedKernelMMAD::GetJitConstants(const fully_connected_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { FullyConnectedTuningData tuning_data = SetTuningParams(params); - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); auto& input = params.inputs[0]; auto& weights = params.weights; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h index 704b291..af7cb33 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.h @@ -36,7 +36,7 @@ public: }; protected: - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp index b5d84af..49057ae 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp @@ -34,8 +34,8 @@ ParamsKey FullyConnected_yxfb_ref::GetSupportedKey() const { return k; } -JitConstants FullyConnected_yxfb_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants FullyConnected_yxfb_ref::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); FusedOpsConfiguration conf = { "", {"b", "f", "y", "x"}, "result", input_dt, 1 }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h index 60af787..dcab3ba 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h @@ -32,6 +32,6 @@ protected: std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp index 515e2b2..be3f08a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp @@ -109,7 +109,7 @@ bool fused_conv_eltwise_kernel_base::Validate(const Params& p, const optional_pa } JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_eltwise_params& params, - const DispatchData& kd) const { + const DispatchData& dispatchData) const { JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params); const auto& padding = params.conv.padding; const auto& input = params.inputs[0]; @@ -151,12 +151,12 @@ JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_el std::vector unrollLoopParams{params.conv.filterSize.x, params.conv.filterSize.y, params.conv.filterSize.z, - (uint32_t)kd.gemmStyle.globalWorkSizeDX, - (uint32_t)kd.gemmStyle.globalWorkSizeDY, - (uint32_t)kd.gemmStyle.globalWorkSizeDZ, - (uint32_t)kd.gemmStyle.subBlockDimM, - (uint32_t)kd.gemmStyle.subBlockDimK, - (uint32_t)kd.gemmStyle.subBlockDimN}; + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDX, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDY, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ, + (uint32_t)dispatchData.gemmStyle.subBlockDimM, + (uint32_t)dispatchData.gemmStyle.subBlockDimK, + (uint32_t)dispatchData.gemmStyle.subBlockDimN}; auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end()); @@ -166,13 +166,15 @@ JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_el return mem_consts; } -bool fused_conv_eltwise_kernel_base::CheckWorkGroups(const fused_conv_eltwise_kernel_base::DispatchData& kd) { - if (kd.gws0 == 0 || kd.gws1 == 0 || kd.gws2 == 0 || kd.lws0 == 0 || kd.lws1 == 0 || kd.lws2 == 0) { +bool fused_conv_eltwise_kernel_base::CheckWorkGroups(const fused_conv_eltwise_kernel_base::DispatchData& dispatchData) { + if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3) return false; - } - if ((kd.gws0 % kd.lws0) != 0 || (kd.gws1 % kd.lws1) != 0 || (kd.gws2 % kd.lws2) != 0) { - return false; + for (size_t i = 0; i < dispatchData.gws.size(); i++) { + if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0) + return false; + if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0) + return false; } return true; @@ -216,43 +218,34 @@ bool fused_conv_eltwise_kernel_base::CheckPitchForSplitOnly(const fused_conv_elt fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_base::SetDefault( const fused_conv_eltwise_params& params, int) const { - DispatchData kd; + DispatchData dispatchData; const auto& out = params.output; - kd.fp16UnitUsed = out.GetDType() == Datatype::F16; - std::vector global; + if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf || params.output.GetLayout() == DataLayout::bfzyx || params.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 || params.output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16) { - global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; + dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; } else { - global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v * out.Z().v }; + dispatchData.gws = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v * out.Z().v }; } - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - kd.cldnnStyle.blockWidth = 1; - kd.cldnnStyle.blockHeight = 1; - kd.cldnnStyle.prefetch = 0; - kd.cldnnStyle.inputBlockArraySize = 0; - kd.cldnnStyle.inputBlockWidth = 0; - - kd.gemmStyle.globalWorkSizeDX = 1; - kd.gemmStyle.globalWorkSizeDY = 1; - kd.gemmStyle.globalWorkSizeDZ = 1; - kd.gemmStyle.subBlockDimK = 1; - kd.gemmStyle.subBlockDimM = 0; - kd.gemmStyle.subBlockDimN = 0; - kd.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - return kd; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + + dispatchData.cldnnStyle.blockWidth = 1; + dispatchData.cldnnStyle.blockHeight = 1; + dispatchData.cldnnStyle.prefetch = 0; + dispatchData.cldnnStyle.inputBlockArraySize = 0; + dispatchData.cldnnStyle.inputBlockWidth = 0; + + dispatchData.gemmStyle.globalWorkSizeDX = 1; + dispatchData.gemmStyle.globalWorkSizeDY = 1; + dispatchData.gemmStyle.globalWorkSizeDZ = 1; + dispatchData.gemmStyle.subBlockDimK = 1; + dispatchData.gemmStyle.subBlockDimM = 0; + dispatchData.gemmStyle.subBlockDimN = 0; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + return dispatchData; } KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& params, @@ -269,9 +262,9 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p if (NeedPaddedInput()) { kd.reorderInput = CovolutionUpdateInputParams(newParams); } - DispatchData runInfo = SetDefault(newParams, autoTuneIndex); + DispatchData dispatchData = SetDefault(newParams, autoTuneIndex); - if (!CheckWorkGroups(runInfo)) { + if (!CheckWorkGroups(dispatchData)) { // Internal Error - wrong calculation of global/local work group sizes return {}; } @@ -287,13 +280,13 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p } auto finalKernelName = GetKernelName(newParams); - auto cldnnJit = GetJitConstants(newParams, runInfo); + auto cldnnJit = GetJitConstants(newParams, dispatchData); auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, finalKernelName, jit, @@ -310,7 +303,7 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); } - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; kd.autoTuneIndex = autoTuneIndex; return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h index 4d1d1aa..9e69183 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h @@ -111,7 +111,7 @@ protected: virtual std::string GetKernelName(const fused_conv_eltwise_params&) const { return kernelName; } virtual bool NeedPaddedInput() const { return false; } bool Validate(const Params& p, const optional_params& o) const override; - virtual JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const; + virtual JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const; virtual DispatchData SetDefault(const fused_conv_eltwise_params& params, int autoTuneIndex = -1) const; static bool CheckWorkGroups(const DispatchData&); static bool CheckPitchForSplitOnly(const fused_conv_eltwise_params& params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp index de8ea67..894cdf0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp @@ -132,29 +132,29 @@ WeightsLayout fused_conv_eltwise_kernel_bfyx_1x1_opt::GetPreferreddWeightsLayout fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_1x1_opt::SetDefault( const fused_conv_eltwise_params& arg, int) const { - DispatchData runInfo = Parent::SetDefault(arg); + DispatchData dispatchData = Parent::SetDefault(arg); constexpr size_t sub_group_size = 8; - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; auto block = get_out_block_size(arg); - runInfo.gws0 = arg.output.X().v / block.out_width; - runInfo.gws1 = arg.output.Y().v / block.out_height; - runInfo.gws2 = 2 * (arg.output.Feature().v * arg.output.Batch().v) / - block.out_depth; // process 8 output channels per Workitem + dispatchData.gws[0] = arg.output.X().v / block.out_width; + dispatchData.gws[1] = arg.output.Y().v / block.out_height; + dispatchData.gws[2] = 2 * (arg.output.Feature().v * arg.output.Batch().v) / + block.out_depth; // process 8 output channels per Workitem - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 2 * sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 2 * sub_group_size; - return runInfo; + return dispatchData; } JitConstants fused_conv_eltwise_kernel_bfyx_1x1_opt::GetJitConstants(const fused_conv_eltwise_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); auto block = get_out_block_size(params); jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h index 3f77a72..4ad16b6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h @@ -35,8 +35,8 @@ protected: WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params &) const override; std::string GetKernelName(const fused_conv_eltwise_params& params) const override; bool NeedPaddedInput() const override { return true; } - JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp index 6f91ce2..991f209 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp @@ -54,19 +54,19 @@ ParamsKey fused_conv_eltwise_kernel_bfyx_iyxo::GetSupportedKey() const { fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_iyxo::SetDefault( const fused_conv_eltwise_params& cp, int) const { - DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(cp); + DispatchData dispatchData = fused_conv_eltwise_kernel_base::SetDefault(cp); - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; - runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 4 / 2; - runInfo.gws1 = cp.output.Y().v / 2; - runInfo.gws2 = sub_group_size; + dispatchData.gws[0] = CeilDiv(cp.output.X().v, sub_group_size) / 4 / 2; + dispatchData.gws[1] = cp.output.Y().v / 2; + dispatchData.gws[2] = sub_group_size; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = sub_group_size; - return runInfo; + return dispatchData; } bool fused_conv_eltwise_kernel_bfyx_iyxo::Validate(const Params& p, const optional_params& o) const { @@ -82,9 +82,9 @@ bool fused_conv_eltwise_kernel_bfyx_iyxo::Validate(const Params& p, const option } JitConstants fused_conv_eltwise_kernel_bfyx_iyxo::GetJitConstants(const fused_conv_eltwise_params& params, - const DispatchData& runInfo) const { - auto jit = Parent::GetJitConstants(params, runInfo); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2)); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2])); return jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h index 965a863..1dddc41 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h @@ -34,7 +34,7 @@ protected: WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params&) const override { return WeightsLayout::iyxo; } - JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp index 3f4582e..7b34ea5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp @@ -148,7 +148,6 @@ fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetAutoTuneOptions(const Params& p, option.blockWidth = 4; option.blockHeight = 3; option.prefetch = 5; - // run_info.efficiency = FORCE_PRIORITY_7; // GEMM is better } // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block @@ -162,38 +161,38 @@ fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetAutoTuneOptions(const Params& p, fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::SetDefault( const fused_conv_eltwise_params& cp, int autoTuneIndex) const { - DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(cp); + DispatchData dispatchData = fused_conv_eltwise_kernel_base::SetDefault(cp); const auto of_maps = cp.output.Feature().v; const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size); - runInfo.efficiency = FORCE_PRIORITY_3; + dispatchData.efficiency = FORCE_PRIORITY_3; auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex); - runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth; - runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight; - runInfo.cldnnStyle.prefetch = tuneOptions.prefetch; + dispatchData.cldnnStyle.blockWidth = tuneOptions.blockWidth; + dispatchData.cldnnStyle.blockHeight = tuneOptions.blockHeight; + dispatchData.cldnnStyle.prefetch = tuneOptions.prefetch; - auto input_block_dims = get_bfyx_req_input_block_dims(runInfo.cldnnStyle.blockWidth, - runInfo.cldnnStyle.blockHeight, + auto input_block_dims = get_bfyx_req_input_block_dims(dispatchData.cldnnStyle.blockWidth, + dispatchData.cldnnStyle.blockHeight, cp.conv.filterSize, cp.conv.stride, cp.conv.dilation, sub_group_size, - runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2, + cp.output.GetDType() == Datatype::F16 ? sub_group_size : sub_group_size / 2, sub_group_size); - runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first; - runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second; + dispatchData.cldnnStyle.inputBlockArraySize = input_block_dims.first; + dispatchData.cldnnStyle.inputBlockWidth = input_block_dims.second; - runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth); - runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight); - runInfo.gws2 = of_threads_per_batch * cp.output.Batch().v; + dispatchData.gws[0] = CeilDiv(cp.output.X().v, dispatchData.cldnnStyle.blockWidth); + dispatchData.gws[1] = CeilDiv(cp.output.Y().v, dispatchData.cldnnStyle.blockHeight); + dispatchData.gws[2] = of_threads_per_batch * cp.output.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = sub_group_size; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = sub_group_size; - return runInfo; + return dispatchData; } bool fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::Validate(const Params& p, const optional_params& o) const { @@ -205,19 +204,19 @@ bool fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::Validate(const Params& p, cons } JitConstants fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetJitConstants(const fused_conv_eltwise_params& params, - const DispatchData& runInfo) const { + const DispatchData& dispatchData) const { const auto of_maps = params.output.Feature().v; const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size); size_t leftovers = of_threads_per_batch - of_maps; - auto jit = Parent::GetJitConstants(params, runInfo); + auto jit = Parent::GetJitConstants(params, dispatchData); - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth)); - jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight)); - jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize)); - jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth)); - jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", dispatchData.lws[2])); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", dispatchData.cldnnStyle.blockWidth)); + jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", dispatchData.cldnnStyle.blockHeight)); + jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", dispatchData.cldnnStyle.inputBlockArraySize)); + jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth)); + jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch)); if (leftovers) { jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h index 3bda6e1..f4179f3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h @@ -33,7 +33,7 @@ public: protected: WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params &) const override; - JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& p, const optional_params& o) const override; bool NeedPaddedInput() const override { return true; } DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override; @@ -50,4 +50,4 @@ private: std::vector autoTuneOptions = {}; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp index 056f439..51a1b75 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp @@ -76,7 +76,7 @@ size_t GetOfmPerWorkitem(Datatype dataType) { fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_yxfb_yxio_b16::SetDefault( const fused_conv_eltwise_params& arg, int) const { - DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(arg); + DispatchData dispatchData = fused_conv_eltwise_kernel_base::SetDefault(arg); const auto filter_ofm_num = arg.weights.OFM().v; const auto batch_size = arg.output.Batch().v; @@ -86,15 +86,15 @@ fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_yxfb_yxio const size_t ofmPerWorkItem = GetOfmPerWorkitem(arg.inputs[0].GetDType()); if (arg.inputs[0].GetDType() == Datatype::F16) { - runInfo.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; } else { - runInfo.efficiency = FORCE_PRIORITY_9; + dispatchData.efficiency = FORCE_PRIORITY_9; } - runInfo.lws0 = min_lws; - runInfo.gws0 = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem); + dispatchData.lws[0] = min_lws; + dispatchData.gws[0] = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem); - return runInfo; + return dispatchData; } bool fused_conv_eltwise_kernel_yxfb_yxio_b16::Validate(const Params& p, const optional_params& o) const { @@ -138,10 +138,10 @@ bool fused_conv_eltwise_kernel_yxfb_yxio_b16::Validate(const Params& p, const op } JitConstants fused_conv_eltwise_kernel_yxfb_yxio_b16::GetJitConstants(const fused_conv_eltwise_params& params, - const DispatchData& kd) const { - auto jit = Parent::GetJitConstants(params, kd); + const DispatchData& dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); - const auto local_work_group_size = kd.lws0; + const auto local_work_group_size = dispatchData.lws[0]; const auto batch_size = params.output.Batch().v; if (params.inputs[0].GetDType() == Datatype::F32) { @@ -166,7 +166,7 @@ JitConstants fused_conv_eltwise_kernel_yxfb_yxio_b16::GetJitConstants(const fuse const size_t ofmPerWorkItem = GetOfmPerWorkitem(params.inputs[0].GetDType()); jit.AddConstants({ - MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0), + MakeJitConstant("LOCAL_WORK_GROUP_SIZE", dispatchData.lws[0]), MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem), MakeJitConstant("BATCHES_PER_WORK_ITEM", batchesPerWorkItem), // how many batches will a single work item compute diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h index 3a20b49..2d9a509 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h @@ -37,7 +37,7 @@ protected: } std::string GetKernelName(const fused_conv_eltwise_params&) const override; bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& dispatchData) const override; DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp index 78f248d..7cc7bb6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp @@ -101,7 +101,7 @@ static inline std::vector GetOrder(size_t size) { } else if (size == 6) { idx_order = {"b", "f", "w", "z", "y", "x"}; } - + return idx_order; } @@ -120,7 +120,7 @@ static std::string GetDictionaryIndexOrder(const gather_params& params, size_t a for (size_t i = dictionary_dims_num; i < idx_order.size(); i++) idx_order[i] = zeroVal; - + // Fix size to inputs[0] dims size for (size_t i = 0; i < params.output.GetDims().size() - params.inputs[0].GetDims().size(); i++) idx_order.pop_back(); @@ -152,33 +152,20 @@ static std::string GetIndecesIdxOrder(const gather_params& params, size_t axis) } CommonDispatchData GatherKernelRef::SetDefault(const gather_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; const auto& output = params.output; - std::vector global; - std::vector local; - if (output.GetLayout() == DataLayout::bfyx) { - global = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v}; + dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v}; } else if (output.GetLayout() == DataLayout::bfzyx) { - global = {output.X().v, output.Y().v * output.Z().v, output.Feature().v * output.Batch().v}; + dispatchData.gws = {output.X().v, output.Y().v * output.Z().v, output.Feature().v * output.Batch().v}; } else { - global = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; } - local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - return runInfo; + return dispatchData; } JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const { @@ -220,14 +207,14 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional KernelData kd = KernelData::Default(params); gather_params& newParams = *static_cast(kd.params.get()); - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params)); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params)); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp index 1042910..4f3a2fc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp @@ -23,49 +23,40 @@ JitConstants GatherTreeKernelBase::GetJitConstants(const gather_tree_params & pa } GatherTreeKernelBase::DispatchData GatherTreeKernelBase::SetDefault(const gather_tree_params & params) const { - std::vector global{ - params.output.Y().v, // beam - params.output.Feature().v, // batch - 1 - }; - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + DispatchData dispatchData; /* b -> time f -> batch y -> beam */ - DispatchData data; - data.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - data.gws0 = global[0]; - data.gws1 = global[1]; - data.gws2 = global[2]; - data.lws0 = local[0]; - data.lws1 = local[1]; - data.lws2 = local[2]; - return data; + dispatchData.gws = { params.output.Y().v, // beam + params.output.Feature().v, // batch + 1 }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + return dispatchData; } KernelsData GatherTreeKernelBase::GetCommonKernelsData(const Params& params, - const optional_params& options, - float estimated_time) const { + const optional_params& options, + float estimated_time) const { assert(params.GetType() == KernelType::GATHER_TREE); const auto& gt_params = static_cast(params); - auto run_info = SetDefault(gt_params); + auto dispatchData = SetDefault(gt_params); auto kernel_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(gt_params); auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); FillCLKernelData(kernel_data.kernels[0], - run_info, - params.engineInfo, - kernelName, - jit, - entry_point, - DEFAULT, - false, - false, - static_cast(gt_params.inputs.size())); + dispatchData, + params.engineInfo, + kernelName, + jit, + entry_point, + DEFAULT, + false, + false, + static_cast(gt_params.inputs.size())); kernel_data.estimatedTime = estimated_time; return { kernel_data }; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp index 249e47f..e887763 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -36,24 +36,13 @@ JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const { GemmKernelBase::DispatchData GemmKernelBase::SetDefault(const gemm_params& params) const { const auto& output = params.output; - DispatchData kd; - - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + DispatchData dispatchData; auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v); - std::vector global = { output.X().v, output.Y().v, total_batches }; - - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.gws = { output.X().v, output.Y().v, total_batches }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - return kd; + return dispatchData; } KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params, @@ -65,7 +54,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params, const auto& prim_params = static_cast(params); - auto run_info = SetDefault(prim_params); + auto dispatchData = SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -74,7 +63,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params, auto& kernel = k_data.kernels[0]; FillCLKernelData(kernel, - run_info, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h index d30d454..5df5bb0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h @@ -60,7 +60,7 @@ protected: virtual DispatchData SetDefault(const gemm_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; // Fused ops - virtual JitConstants GetFusedPrimitivesJitConstants(const gemm_params& params, const DispatchData& kd) const; + virtual JitConstants GetFusedPrimitivesJitConstants(const gemm_params& params, const DispatchData& dispatchData) const; Datatype GetActivationType(const gemm_params& params) const; // --Fused ops diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp index df5534a..537825d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp @@ -75,24 +75,15 @@ GemmKernelBase::DispatchData GemmKernelMMADint8::SetDefault(const gemm_params& p const auto& output = params.output; auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v); - DispatchData kd; + DispatchData dispatchData; GemmTuningData td = SetTuningParams(params); - std::vector global = { Align(output.X().v, td.simd_size), - Align(output.Y().v, td.simd_size * td.tile_num) / (td.simd_size * td.tile_num), - total_batches }; + dispatchData.gws = { Align(output.X().v, td.simd_size), + Align(output.Y().v, td.simd_size * td.tile_num) / (td.simd_size * td.tile_num), + total_batches }; + dispatchData.lws = { td.simd_size, 1, 1 }; - std::vector local = { td.simd_size, 1, 1 }; - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } GemmKernelMMADint8::GemmTuningData GemmKernelMMADint8::InitGemmTuningData(const gemm_params& params) const { @@ -154,7 +145,7 @@ KernelsData GemmKernelMMADint8::GetKernelsData(const Params& params, const optio const auto& prim_params = static_cast(params); - auto run_info = GemmKernelMMADint8::SetDefault(prim_params); + auto dispatchData = GemmKernelMMADint8::SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -163,7 +154,7 @@ KernelsData GemmKernelMMADint8::GetKernelsData(const Params& params, const optio auto& kernel = k_data.kernels[0]; FillCLKernelData(kernel, - run_info, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp index 0b1f307..94d25bf 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp @@ -72,21 +72,13 @@ GemmKernelBase::DispatchData GemmKernelMMADslmInt8::SetDefault(const gemm_params const auto& output = params.output; auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v); - DispatchData kd; + DispatchData dispatchData; GemmTuningData td = SetTuningParams(params); - std::vector global = { td.size_n / td.pack_size, output.Y().v / td.simd_size, total_batches }; - std::vector local = { td.slm_tile_size / td.pack_size, td.slm_tile_size / td.simd_size, 1 }; + dispatchData.gws = { td.size_n / td.pack_size, output.Y().v / td.simd_size, total_batches }; + dispatchData.lws = { td.slm_tile_size / td.pack_size, td.slm_tile_size / td.simd_size, 1 }; - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } GemmKernelMMADslmInt8::GemmTuningData GemmKernelMMADslmInt8::InitGemmTuningData(const gemm_params& params) const { @@ -123,7 +115,7 @@ KernelsData GemmKernelMMADslmInt8::GetKernelsData(const Params& params, const op const auto& prim_params = static_cast(params); - auto run_info = GemmKernelMMADslmInt8::SetDefault(prim_params); + auto dispatchData = GemmKernelMMADslmInt8::SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -132,7 +124,7 @@ KernelsData GemmKernelMMADslmInt8::GetKernelsData(const Params& params, const op auto& kernel = k_data.kernels[0]; FillCLKernelData(kernel, - run_info, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp index 8ae6662..8b72216 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp @@ -40,21 +40,21 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const { GemmKernelBase::DispatchData GemmKernelTiledOpt::SetDefault(const gemm_params& params) const { const auto& output = params.output; - DispatchData kd; + DispatchData dispatchData; GemmTuningData td = SetTuningParams(params); auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v); std::vector global = { output.X().v, output.Y().v, total_batches }; - kd.gws0 = Align(global[0], td.tile_n_size) / (td.tile_n_size / td.simd_size); - kd.gws1 = Align(global[1], td.tile_m_size) / td.tile_m_size; - kd.gws2 = global[2]; + dispatchData.gws[0] = Align(global[0], td.tile_n_size) / (td.tile_n_size / td.simd_size); + dispatchData.gws[1] = Align(global[1], td.tile_m_size) / td.tile_m_size; + dispatchData.gws[2] = global[2]; - kd.lws0 = td.simd_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = td.simd_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gemm_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp index a63d841..fec3194 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.cpp @@ -28,21 +28,11 @@ JitConstants GRNKernelBase::GetJitConstants(const grn_params& params, GRNKernelB GRNKernelBase::DispatchData GRNKernelBase::SetDefault(const grn_params& params) const { const auto& output = params.output; - DispatchData kd; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + DispatchData dispatchData; + dispatchData.gws = { output.Batch().v, output.Y().v, output.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - std::vector global = { output.Batch().v, output.Y().v, output.X().v }; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData GRNKernelBase::GetCommonKernelsData(const Params& params, @@ -55,19 +45,17 @@ KernelsData GRNKernelBase::GetCommonKernelsData(const Params& params, const grn_params& orgParams = static_cast(params); - DispatchData runInfo; - - runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); - auto cldnn_jit = GetJitConstants(orgParams, runInfo); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h index d960541..f17fca8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/grn/grn_kernel_base.h @@ -44,7 +44,7 @@ public: using DispatchData = CommonDispatchData; protected: - virtual JitConstants GetJitConstants(const grn_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const grn_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const grn_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp index 945524c..ce148e9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp @@ -56,7 +56,7 @@ static unsigned int GetOfmPerSimd(const lrn_params& params) { } CommonDispatchData LRNKernelAcrossChannelMultipleFeatures::SetDefault(const lrn_params& params) const { - CommonDispatchData runInfo = LRNKernelBase::SetDefault(params); + CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params); const auto& input = params.inputs[0]; unsigned int ofm_per_simd = GetOfmPerSimd(params); @@ -65,24 +65,24 @@ CommonDispatchData LRNKernelAcrossChannelMultipleFeatures::SetDefault(const lrn_ const auto& out = params.output; const unsigned int alignment = out.X().v > 16 ? 32 : 16; - runInfo.gws0 = Align(out.X().v, alignment); - runInfo.gws1 = out.Y().v; - runInfo.gws2 = (out.Feature().v * out.Batch().v) / ofm_per_simd; + dispatchData.gws[0] = Align(out.X().v, alignment); + dispatchData.gws[1] = out.Y().v; + dispatchData.gws[2] = (out.Feature().v * out.Batch().v) / ofm_per_simd; - runInfo.lws0 = alignment; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = alignment; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } else if (input.GetLayout() == DataLayout::yxfb) { - runInfo.gws0 /= ofm_per_simd; - runInfo.lws0 = std::min(std::max(runInfo.gws0, static_cast(1)), static_cast(32)); - while (runInfo.gws0 % runInfo.lws0 != 0) { - --runInfo.lws0; + dispatchData.gws[0] /= ofm_per_simd; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } } - runInfo.efficiency = FORCE_PRIORITY_6; + dispatchData.efficiency = FORCE_PRIORITY_6; - return runInfo; + return dispatchData; } bool LRNKernelAcrossChannelMultipleFeatures::Validate(const Params& p, const optional_params& o) const { @@ -98,8 +98,8 @@ bool LRNKernelAcrossChannelMultipleFeatures::Validate(const Params& p, const opt return true; } -JitConstants LRNKernelAcrossChannelMultipleFeatures::GetJitConstants(const lrn_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants LRNKernelAcrossChannelMultipleFeatures::GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const auto& input = params.inputs[0]; const auto& input_dt = params.inputs[0].GetDType(); const auto& output = params.output; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h index 395bc90..384a2e4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h @@ -35,6 +35,6 @@ protected: FusedOpType::ACTIVATION }; } bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp index 69fd391..1746dee 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp @@ -38,32 +38,23 @@ ParamsKey LRNKernelAcrossChannelMultipleFeaturesFSV16::GetSupportedKey() const { } CommonDispatchData LRNKernelAcrossChannelMultipleFeaturesFSV16::SetDefault(const lrn_params& params) const { - CommonDispatchData runInfo = LRNKernelBase::SetDefault(params); + CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params); const auto& out = params.output; const unsigned int alignment = 16; - std::vector global = {Align(out.Feature().v, alignment), - out.X().v, - out.Y().v * out.Batch().v}; + dispatchData.gws = { Align(out.Feature().v, alignment), + out.X().v, + out.Y().v * out.Batch().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.efficiency = FORCE_PRIORITY_6; - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - runInfo.efficiency = FORCE_PRIORITY_6; - - return runInfo; + return dispatchData; } -JitConstants LRNKernelAcrossChannelMultipleFeaturesFSV16::GetJitConstants(const lrn_params& params, const DispatchData& kd) const { - JitConstants jit = LRNKernelBase::GetJitConstants(params, kd); +JitConstants LRNKernelAcrossChannelMultipleFeaturesFSV16::GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const { + JitConstants jit = LRNKernelBase::GetJitConstants(params, dispatchData); const auto& input_dt = params.inputs[0].GetDType(); if (!params.fused_ops.empty()) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h index 7827577..397f4c1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h @@ -27,6 +27,6 @@ public: private: DispatchData SetDefault(const lrn_params& params) const override; - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp index a551c18..1bc2623 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.cpp @@ -36,12 +36,12 @@ ParamsKey LRNKernelAcrossChannel_b8::GetSupportedKey() const { } CommonDispatchData LRNKernelAcrossChannel_b8::SetDefault(const lrn_params& params) const { - CommonDispatchData run_info = LRNKernelBase::SetDefault(params); + CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params); - run_info.gws0 /= 8; - run_info.lws0 = 8; // gws0 is dividable by 64, so after correction it will be dividable by 8. + dispatchData.gws[0] /= 8; + dispatchData.lws[0] = 8; // gws[0] is dividable by 64, so after correction it will be dividable by 8. - return run_info; + return dispatchData; } bool LRNKernelAcrossChannel_b8::Validate(const Params& p, const optional_params& o) const { @@ -62,8 +62,8 @@ bool LRNKernelAcrossChannel_b8::Validate(const Params& p, const optional_params& return true; } -JitConstants LRNKernelAcrossChannel_b8::GetJitConstants(const lrn_params& params, const DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); +JitConstants LRNKernelAcrossChannel_b8::GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const auto& input_dt = params.inputs[0].GetDType(); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 8)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h index 9c1e298..c837a54 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h @@ -36,6 +36,6 @@ private: FusedOpType::ACTIVATION }; } bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp index 693b98a..b4c1443 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.cpp @@ -40,25 +40,25 @@ ParamsKey LRNKernelAcrossChannelRef::GetSupportedKey() const { } CommonDispatchData LRNKernelAcrossChannelRef::SetDefault(const lrn_params& params) const { - CommonDispatchData runInfo = LRNKernelBase::SetDefault(params); + CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params); if (params.inputs[0].GetLayout() == DataLayout::bfyx) { const auto& out = params.output; - runInfo.gws0 = Align(out.X().v, 32); - runInfo.gws1 = out.Y().v; - runInfo.gws2 = out.Feature().v * out.Batch().v; + dispatchData.gws[0] = Align(out.X().v, 32); + dispatchData.gws[1] = out.Y().v; + dispatchData.gws[2] = out.Feature().v * out.Batch().v; - runInfo.lws0 = 32; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } - return runInfo; + return dispatchData; } JitConstants LRNKernelAcrossChannelRef::GetJitConstants(const lrn_params& params, - const LRNKernelBase::DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); + const LRNKernelBase::DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const auto& input_dt = params.inputs[0].GetDType(); if (!params.fused_ops.empty()) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h index fd206c5..e3832a5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h @@ -35,6 +35,6 @@ protected: FusedOpType::SCALE, FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp index 8e444f9..9f4fa16 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ bool LRNKernelBase::Validate(const Params& p, const optional_params& o) const { return true; } -JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNKernelBase::DispatchData& kd) const { +JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNKernelBase::DispatchData& /*dispatchData*/) const { JitConstants mem_consts = MakeBaseParamsJitConstants(params); const auto padding = (params.localSize - 1) / 2; @@ -57,10 +57,10 @@ JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNK auto alpha_div_by_size_abs_sqrt = std::sqrt(std::abs(alpha_div_by_size)); mem_consts.AddConstants({ - MakeJitConstant("ALPHA_AFTER_FACTORED", kd.fp16UnitUsed ? alpha_sign : alpha), - MakeJitConstant("ALPHA_DIV_BY_SIZE", kd.fp16UnitUsed ? alpha_sign : alpha_div_by_size), - MakeJitConstant("ALPHA_VAL_FACTOR", kd.fp16UnitUsed ? alpha_abs_sqrt : 1.0f), - MakeJitConstant("ALPHA_VAL_FACTOR_DIV_BY_SIZE", kd.fp16UnitUsed ? alpha_div_by_size_abs_sqrt : 1.0f), + MakeJitConstant("ALPHA_AFTER_FACTORED", params.inputs[0].GetDType() == Datatype::F16 ? alpha_sign : alpha), + MakeJitConstant("ALPHA_DIV_BY_SIZE", params.inputs[0].GetDType() == Datatype::F16 ? alpha_sign : alpha_div_by_size), + MakeJitConstant("ALPHA_VAL_FACTOR", params.inputs[0].GetDType() == Datatype::F16 ? alpha_abs_sqrt : 1.0f), + MakeJitConstant("ALPHA_VAL_FACTOR_DIV_BY_SIZE", params.inputs[0].GetDType() == Datatype::F16 ? alpha_div_by_size_abs_sqrt : 1.0f), }); return mem_consts; @@ -69,22 +69,21 @@ JitConstants LRNKernelBase::GetJitConstants(const lrn_params& params, const LRNK LRNKernelBase::DispatchData LRNKernelBase::SetDefault(const lrn_params& params) const { const auto& output = params.output; - DispatchData kd; + DispatchData dispatchData; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; // Determine global work sizes. - kd.gws0 = output.Batch().v * output.Feature().v; // B, F - kd.gws1 = output.X().v; // X - kd.gws2 = output.Y().v; // Y + dispatchData.gws[0] = output.Batch().v * output.Feature().v; // B, F + dispatchData.gws[1] = output.X().v; // X + dispatchData.gws[2] = output.Y().v; // Y // Find largest positive local work size that is divider for global work size. - kd.lws0 = std::min(std::max(kd.gws0, static_cast(1)), static_cast(32)); - while (kd.gws0 % kd.lws0 != 0) { - --kd.lws0; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } KernelsData LRNKernelBase::GetCommonKernelsData(const Params& params, @@ -96,17 +95,17 @@ KernelsData LRNKernelBase::GetCommonKernelsData(const Params& params, const lrn_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); - auto cldnnJit = GetJitConstants(orgParams, runInfo); + auto cldnnJit = GetJitConstants(orgParams, dispatchData); auto entryPoint = GetEntryPoint(kernelName, orgParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); auto fused_deps_total = GetFusedPrimitiveInputsCount(params); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h index 8314e85..8b95eff 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.h @@ -61,7 +61,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - virtual JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const; + virtual JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const; virtual DispatchData SetDefault(const lrn_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const; }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp index 86ccca3..b856877 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ ParamsKey LRNKernelRef::GetSupportedKey() const { return k; } -JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKernelRef::Parent::DispatchData& kd) const { +JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKernelRef::Parent::DispatchData& dispatchData) const { const uint32_t round_norm_size = (params.localSize / 2) * 2 + 1; uint32_t numElement = round_norm_size * round_norm_size; const auto& input_dt = params.inputs[0].GetDType(); @@ -54,7 +54,7 @@ JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKe const float num_element_div = 1.f / static_cast(numElement); - JitConstants jit = Parent::GetJitConstants(params, kd); + JitConstants jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstants({ MakeJitConstant("NUM_ELEMENTS_DIV", num_element_div), MakeJitConstant("GWS_BATCH", 2), @@ -71,22 +71,14 @@ JitConstants LRNKernelRef::GetJitConstants(const lrn_params& params, const LRNKe } LRNKernelRef::Parent::DispatchData LRNKernelRef::SetDefault(const lrn_params& params) const { - DispatchData kd = Parent::SetDefault(params); + DispatchData dispatchData = Parent::SetDefault(params); const auto& out = params.output; - std::vector global = {out.X().v * out.Y().v, out.Feature().v, out.Batch().v}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { out.X().v * out.Y().v, out.Feature().v, out.Batch().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData LRNKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h index 0872feb..36be0cb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h @@ -35,6 +35,6 @@ private: FusedOpType::SCALE, FusedOpType::ACTIVATION }; } - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp index 5b2f254..e3530c8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,9 +39,8 @@ ParamsKey LRNKernelWithinChannelByxfOpt::GetSupportedKey() const { return k; } -JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants( - const lrn_params& params, - const LRNKernelBase::DispatchData& kd) const { +JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants(const lrn_params& params, + const LRNKernelBase::DispatchData& dispatchData) const { const uint32_t round_norm_size = (params.localSize / 2) * 2 + 1; uint32_t numElement = round_norm_size * round_norm_size; const auto& input_dt = params.inputs[0].GetDType(); @@ -52,7 +51,7 @@ JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants( const float num_element_div = 1.f / static_cast(numElement); - JitConstants jit = Parent::GetJitConstants(params, kd); + JitConstants jit = Parent::GetJitConstants(params, dispatchData); jit.AddConstants({ MakeJitConstant("NUM_ELEMENTS_DIV", num_element_div), MakeJitConstant("GWS_BATCH", 2), @@ -70,22 +69,14 @@ JitConstants LRNKernelWithinChannelByxfOpt::GetJitConstants( LRNKernelWithinChannelByxfOpt::Parent::DispatchData LRNKernelWithinChannelByxfOpt::SetDefault( const lrn_params& params) const { - DispatchData kd = Parent::SetDefault(params); + DispatchData dispatchData = Parent::SetDefault(params); const auto& out = params.output; - std::vector global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, 8), out.Batch().v}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { out.X().v * out.Y().v, CeilDiv(out.Feature().v, 8), out.Batch().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } bool LRNKernelWithinChannelByxfOpt::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h index 9cdd64f..4ae0e54 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h @@ -37,6 +37,6 @@ private: FusedOpType::ACTIVATION }; } bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp index b788ced..de4f0f5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,22 +38,22 @@ ParamsKey LRNKernelWithinChannel::GetSupportedKey() const { } CommonDispatchData LRNKernelWithinChannel::SetDefault(const lrn_params& params) const { - CommonDispatchData runInfo = LRNKernelBase::SetDefault(params); + CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params); - runInfo.gws0 = 128 * 128; - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = 128 * 128; + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = 128; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 128; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } JitConstants LRNKernelWithinChannel::GetJitConstants(const lrn_params& params, - const LRNKernelWithinChannel::Parent::DispatchData& kd) const { - JitConstants jit = Parent::GetJitConstants(params, kd); + const LRNKernelWithinChannel::Parent::DispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); const auto& input_dt = params.inputs[0].GetDType(); if (!params.fused_ops.empty()) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h index adaf9c3..93500a8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h @@ -36,6 +36,6 @@ private: FusedOpType::ACTIVATION }; } bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp index 22e95f7..4b69db9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,19 +38,19 @@ ParamsKey LRNKernelWithinChannelOpt::GetSupportedKey() const { } CommonDispatchData LRNKernelWithinChannelOpt::SetDefault(const lrn_params& params) const { - CommonDispatchData runInfo = LRNKernelBase::SetDefault(params); + CommonDispatchData dispatchData = LRNKernelBase::SetDefault(params); const auto totalSize = params.inputs[0].LogicalSize(); const unsigned work_group_size = (totalSize < 128) ? 32 : 128; - runInfo.gws0 = Align(params.inputs[0].LogicalSize(), work_group_size); - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = Align(params.inputs[0].LogicalSize(), work_group_size); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = work_group_size; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = work_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return runInfo; + return dispatchData; } bool LRNKernelWithinChannelOpt::Validate(const Params& p, const optional_params& o) const { @@ -60,9 +60,9 @@ bool LRNKernelWithinChannelOpt::Validate(const Params& p, const optional_params& return true; } -JitConstants LRNKernelWithinChannelOpt::GetJitConstants(const lrn_params& params, const LRNKernelWithinChannelOpt::Parent::DispatchData& kd) const { +JitConstants LRNKernelWithinChannelOpt::GetJitConstants(const lrn_params& params, const LRNKernelWithinChannelOpt::Parent::DispatchData& dispatchData) const { const auto& input_dt = params.inputs[0].GetDType(); - JitConstants jit = Parent::GetJitConstants(params, kd); + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { FusedOpsConfiguration conf = {"", {"batch_id", "feature_id", "y", "x"}, "lrn_result", input_dt, 1}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h index 8740055..cce68e2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h @@ -35,6 +35,6 @@ private: FusedOpType::ACTIVATION }; } bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp index 3768059..6088de2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -75,7 +75,7 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params, return {}; } - DispatchData run_info; + DispatchData dispatchData; KernelData kd = KernelData::Default(params); lstm_dynamic_input_params& dlstm_params = *static_cast(kd.params.get()); @@ -83,18 +83,8 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params, const auto& out = dlstm_params.output; auto hidden_size = out.X().v; - std::vector global = { hidden_size / simd_size, out.Batch().v * out.Y().v, out.Feature().v }; - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - run_info.gws0 = global[0]; - run_info.gws1 = global[1]; - run_info.gws2 = global[2]; - - run_info.lws0 = local[0]; - run_info.lws1 = local[1]; - run_info.lws2 = local[2]; - - run_info.fp16UnitUsed = dlstm_params.inputs[0].GetDType() == Datatype::F16; + dispatchData.gws = { hidden_size / simd_size, out.Batch().v * out.Y().v, out.Feature().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); bool succeed = UpdateWeightsParams(dlstm_params, options, @@ -111,8 +101,8 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - kernel.workGroups.global = { run_info.gws0, run_info.gws1, run_info.gws2 }; - kernel.workGroups.local = { run_info.lws0, run_info.lws1, run_info.lws2 }; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); SetKernelArguments(dlstm_params, kernel); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp index aecd6e6..aea352f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,23 +37,14 @@ JitConstants LSTM_DynamicInputKernelBase::GetJitConstants(const lstm_dynamic_inp LSTM_DynamicInputKernelBase::DispatchData LSTM_DynamicInputKernelBase::SetDefault( const lstm_dynamic_input_params& params) { - DispatchData kd; + DispatchData dispatchData; const auto& out = params.output; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; // 4 * hidden, batch * dir, seq_len - std::vector global = {out.X().v, out.Batch().v * out.Y().v, out.Feature().v}; - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { out.X().v, out.Batch().v * out.Y().v, out.Feature().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } void kernel_selector::LSTM_DynamicInputKernelBase::SetKernelArguments(const lstm_dynamic_input_params& params, clKernelData& kernel) const { @@ -75,7 +66,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para const lstm_dynamic_input_params& orgParams = static_cast(params); - auto run_info = SetDefault(orgParams); + auto dispatchData = SetDefault(orgParams); KernelData k_data = KernelData::Default(params, 1); auto cldnn_jit = GetJitConstants(orgParams); @@ -83,7 +74,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - kernel.workGroups.global = {run_info.gws0, run_info.gws1, run_info.gws2}; + kernel.workGroups.global = dispatchData.gws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); SetKernelArguments(orgParams, kernel); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp index 7384048..81acef8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -86,24 +86,15 @@ JitConstants LSTM_DynamicTimeloopKernelBase::GetJitConstants(const lstm_dynamic_ LSTM_DynamicTimeloopKernelBase::DispatchData LSTM_DynamicTimeloopKernelBase::SetDefault( const lstm_dynamic_timeloop_params& params) { - DispatchData kd; + DispatchData dispatchData; const auto& out = params.output; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; auto out_x_size = out.X().v; auto gws0 = out_x_size > 256 ? 256 : out_x_size; - std::vector global = {gws0, out.Batch().v, static_cast(params.direction)}; - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { gws0, out.Batch().v, static_cast(params.direction) }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } void kernel_selector::LSTM_DynamicTimeloopKernelBase::SetKernelArguments(const lstm_dynamic_timeloop_params& params, clKernelData& kernel) const { @@ -136,7 +127,7 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p const lstm_dynamic_timeloop_params& org_params = static_cast(params); - auto run_info = SetDefault(org_params); + auto dispatchData = SetDefault(org_params); KernelData k_data = KernelData::Default(params, 1); auto cldnn_jit = GetJitConstants(org_params); @@ -144,8 +135,8 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - kernel.workGroups.global = {run_info.gws0, run_info.gws1, run_info.gws2}; - kernel.workGroups.local = {run_info.lws0, run_info.lws1, run_info.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); SetKernelArguments(org_params, kernel); k_data.estimatedTime = estimated_time; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp index 93406e9..e6f6a86 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,32 +35,32 @@ JitConstants MaxUnpoolingKernelBase::GetJitConstants(const max_unpooling_params& MaxUnpoolingKernelBase::DispatchData MaxUnpoolingKernelBase::SetDefault(const max_unpooling_params& params) const { const auto& input = params.inputs[0]; - DispatchData kd; + DispatchData dispatchData; if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) { // Determine global work sizes. - kd.gws2 = input.Batch().v * input.Feature().v; // B, F - kd.gws0 = Align(input.X().v, 32); // X - kd.gws1 = input.Y().v; // Y + dispatchData.gws[2] = input.Batch().v * input.Feature().v; // B, F + dispatchData.gws[0] = Align(input.X().v, 32); // X + dispatchData.gws[1] = input.Y().v; // Y - kd.lws0 = 32; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } else { // Determine global work sizes. - kd.gws0 = input.Batch().v * input.Feature().v; // B, F - kd.gws1 = input.X().v; // X - kd.gws2 = input.Y().v; // Y + dispatchData.gws[0] = input.Batch().v * input.Feature().v; // B, F + dispatchData.gws[1] = input.X().v; // X + dispatchData.gws[2] = input.Y().v; // Y - kd.lws0 = std::min(std::max(kd.gws0, static_cast(1)), static_cast(32)); - while (kd.gws0 % kd.lws0 != 0) { - --kd.lws0; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } - return kd; + return dispatchData; } KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params, @@ -72,7 +72,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params, const max_unpooling_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); @@ -81,7 +81,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp index 80955a1..22011d6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp @@ -67,7 +67,7 @@ bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_para } MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_params& params) const { - auto kd = Parent::SetDefault(params); + auto dispatchData = Parent::SetDefault(params); auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v; auto max_wg = params.engineInfo.maxWorkGroupSize; @@ -79,28 +79,28 @@ MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_p auto lws = std::max(std::min(items_num, max_lws) / simd, (size_t)1) * simd; - kd.gws0 = lws; - kd.gws1 = CeilDiv(params.output.Feature().v, fsv); - kd.gws2 = params.output.Batch().v; + dispatchData.gws[0] = lws; + dispatchData.gws[1] = CeilDiv(params.output.Feature().v, fsv); + dispatchData.gws[2] = params.output.Batch().v; - kd.lws0 = lws; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = lws; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.itemsNum = 1; + dispatchData.itemsNum = 1; - return kd; + return dispatchData; } -JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& params, DispatchData kd) const { - auto jits = Parent::GetJitConstants(params, kd); +JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& params, DispatchData dispatchData) const { + auto jits = Parent::GetJitConstants(params, dispatchData); auto activation_dt = GetActivationType(params); jits.Merge(MakeTypeJitConstants(activation_dt, "MEAN")); jits.AddConstant(MakeJitConstant("SIMD", simd)); - jits.AddConstant(MakeJitConstant("LWS", kd.lws0)); - jits.AddConstant(MakeJitConstant("GWS", kd.gws0)); - jits.AddConstant(MakeJitConstant("ITEM_GROUPS", kd.itemsNum)); + jits.AddConstant(MakeJitConstant("LWS", dispatchData.lws[0])); + jits.AddConstant(MakeJitConstant("GWS", dispatchData.gws[0])); + jits.AddConstant(MakeJitConstant("ITEM_GROUPS", dispatchData.itemsNum)); if (!params.fused_ops.empty()) { std::vector idx_order; @@ -126,7 +126,7 @@ JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& par MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti( const mvn_params& params) const { - MultiDispatchData md; + MultiDispatchData dispatchData; auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v; auto max_wg = params.engineInfo.maxWorkGroupSize; @@ -139,43 +139,43 @@ MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::Se // TODO Check if larger number of work-groups does not provide benefit size_t item_groups = pref_work_groups; - md.item_groups = item_groups; + dispatchData.item_groups = item_groups; size_t stage1_lws = lws; - md.stage_1.gws0 = stage1_lws * item_groups; - md.stage_1.gws1 = CeilDiv(params.output.Feature().v, fsv); - md.stage_1.gws2 = params.output.Batch().v; + dispatchData.stage_1.gws[0] = stage1_lws * item_groups; + dispatchData.stage_1.gws[1] = CeilDiv(params.output.Feature().v, fsv); + dispatchData.stage_1.gws[2] = params.output.Batch().v; - md.stage_1.lws0 = stage1_lws; - md.stage_1.lws1 = 1; - md.stage_1.lws2 = 1; + dispatchData.stage_1.lws[0] = stage1_lws; + dispatchData.stage_1.lws[1] = 1; + dispatchData.stage_1.lws[2] = 1; - md.stage_1.itemsNum = item_groups; + dispatchData.stage_1.itemsNum = item_groups; size_t stage2_lws = std::max(std::min(item_groups, max_lws) / simd, (size_t)1) * simd; - md.stage_2.gws0 = stage2_lws; - md.stage_2.gws1 = CeilDiv(params.output.Feature().v, fsv); - md.stage_2.gws2 = params.output.Batch().v; + dispatchData.stage_2.gws[0] = stage2_lws; + dispatchData.stage_2.gws[1] = CeilDiv(params.output.Feature().v, fsv); + dispatchData.stage_2.gws[2] = params.output.Batch().v; - md.stage_2.lws0 = stage2_lws; - md.stage_2.lws1 = 1; - md.stage_2.lws2 = 1; + dispatchData.stage_2.lws[0] = stage2_lws; + dispatchData.stage_2.lws[1] = 1; + dispatchData.stage_2.lws[2] = 1; - md.stage_2.itemsNum = item_groups; + dispatchData.stage_2.itemsNum = item_groups; - md.stage_final.gws0 = std::max(items_num / simd, (size_t)1) * simd; - md.stage_final.gws1 = CeilDiv(params.output.Feature().v, fsv); - md.stage_final.gws2 = params.output.Batch().v; + dispatchData.stage_final.gws[0] = std::max(items_num / simd, (size_t)1) * simd; + dispatchData.stage_final.gws[1] = CeilDiv(params.output.Feature().v, fsv); + dispatchData.stage_final.gws[2] = params.output.Batch().v; - md.stage_final.lws0 = simd; - md.stage_final.lws1 = 1; - md.stage_final.lws2 = 1; + dispatchData.stage_final.lws[0] = simd; + dispatchData.stage_final.lws[1] = 1; + dispatchData.stage_final.lws[2] = 1; - md.stage_final.itemsNum = 1; + dispatchData.stage_final.itemsNum = 1; - return md; + return dispatchData; } KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params, @@ -187,7 +187,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par constexpr size_t intermidiate_bytes = 4; const mvn_params& orgParams = static_cast(params); - auto runInfo = SetDefaultForMulti(orgParams); + auto dispatchData = SetDefaultForMulti(orgParams); size_t kernels_num = params.mvnNormalizeVariance ? 5 : 3; KernelData kd = KernelData::Default(params, kernels_num); @@ -195,13 +195,13 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par auto finalKernelName = GetKernelName(orgParams); { // Mean first stage - auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_1); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1); cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_1", 1)); auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo.stage_1, + dispatchData.stage_1, params.engineInfo, finalKernelName, jit, @@ -215,17 +215,17 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * - runInfo.item_groups * intermidiate_bytes); + dispatchData.item_groups * intermidiate_bytes); } { // Mean second stage - auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_2); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2); cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_2", 1)); auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[1]; FillCLKernelData(kernel, - runInfo.stage_2, + dispatchData.stage_2, params.engineInfo, finalKernelName, jit, @@ -243,13 +243,13 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par } if (params.mvnNormalizeVariance) { // Variance first stage - auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_1); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1); cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_1", 1)); auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[2]; FillCLKernelData(kernel, - runInfo.stage_1, + dispatchData.stage_1, params.engineInfo, finalKernelName, jit, @@ -266,13 +266,13 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par } if (params.mvnNormalizeVariance) { // Variance second stage - auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_2); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2); cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_2", 1)); auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[3]; FillCLKernelData(kernel, - runInfo.stage_2, + dispatchData.stage_2, params.engineInfo, finalKernelName, jit, @@ -289,7 +289,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par intermidiate_bytes); } { // Final - auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_final); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_final); cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MAIN", 1)); cldnn_jit.AddConstant(MakeJitConstant("PRECALC_MEAN", 1)); cldnn_jit.AddConstant(MakeJitConstant("PRECALC_VARIANCE", params.mvnNormalizeVariance)); @@ -297,7 +297,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[kernels_num - 1]; FillCLKernelData(kernel, - runInfo.stage_final, + dispatchData.stage_final, params.engineInfo, finalKernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp index 38d9e99..2a1811f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp @@ -40,7 +40,7 @@ protected: bool Validate(const Params&, const optional_params&) const override; DispatchData SetDefault(const mvn_params& params) const override; - JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const mvn_params& params, DispatchData dispatchData) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp index 75ed07b..4482a18 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp @@ -45,29 +45,16 @@ JitConstants MVNKernelBase::GetJitConstants(const mvn_params& params, MVNKernelB MVNKernelBase::DispatchData MVNKernelBase::SetDefault(const mvn_params& params) const { const auto& output = params.output; - DispatchData kd; - - std::vector global(3); - - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - + DispatchData dispatchData; if (params.mvnMode == MVNMode::WITHIN_CHANNELS) { - global = {output.Batch().v, output.Feature().v, 1}; + dispatchData.gws = {output.Batch().v, output.Feature().v, 1}; } else { - global = {output.Batch().v, 1, 1}; + dispatchData.gws = {output.Batch().v, 1, 1}; } - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params, @@ -80,20 +67,18 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params, const mvn_params& orgParams = static_cast(params); - DispatchData runInfo; - - runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); auto finalKernelName = GetKernelName(orgParams); - auto cldnn_jit = GetJitConstants(orgParams, runInfo); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options); auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, finalKernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h index f2485f1..da2e816 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.h @@ -68,7 +68,7 @@ public: protected: bool Validate(const Params&, const optional_params&) const override; - virtual JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const mvn_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const mvn_params& params) const; virtual std::string GetKernelName(const mvn_params&) const { return kernelName; } KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp index c1e006b..e0207c6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -44,57 +44,55 @@ ParamsKey MVNKernelBfyxOpt::GetSupportedKey() const { } MVNKernelBfyxOpt::Parent::DispatchData MVNKernelBfyxOpt::SetDefault(const mvn_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.inputs[0]; - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - if (params.mvnMode == MVNMode::WITHIN_CHANNELS) { - kd.dataSetSize = input.X().v * input.Y().v * input.Z().v; - kd.dataSetsCount = input.Batch().v * input.Feature().v; + dispatchData.dataSetSize = input.X().v * input.Y().v * input.Z().v; + dispatchData.dataSetsCount = input.Batch().v * input.Feature().v; } else { - kd.dataSetSize = input.X().v * input.Y().v * input.Z().v * input.Feature().v; - kd.dataSetsCount = input.Batch().v; + dispatchData.dataSetSize = input.X().v * input.Y().v * input.Z().v * input.Feature().v; + dispatchData.dataSetsCount = input.Batch().v; } // start with 1 thread per data set - kd.gws0 = 1; - kd.gws1 = kd.dataSetsCount; - kd.gws2 = 1; - kd.itemsNum = kd.dataSetSize; + dispatchData.gws[0] = 1; + dispatchData.gws[1] = dispatchData.dataSetsCount; + dispatchData.gws[2] = 1; + dispatchData.itemsNum = dispatchData.dataSetSize; // We have two units of data per work item in current implementation. - auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float)); + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); // Combining device execution and local memory restrictions to compute maximum possible LWS. auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi); - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory // reads. - while ((kd.itemsNum > 32 || kd.lws0 < kd.itemsNum) && (2 * kd.lws0 <= max_lws)) { - kd.lws0 *= 2; - kd.itemsNum /= 2; + while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) { + dispatchData.lws[0] *= 2; + dispatchData.itemsNum /= 2; } - kd.gws0 = kd.lws0; - kd.leftovers = kd.dataSetSize % kd.lws0; + dispatchData.gws[0] = dispatchData.lws[0]; + dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0]; - return kd; + return dispatchData; } -JitConstants MVNKernelBfyxOpt::GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData kd) const { - auto jit = MVNKernelBase::GetJitConstants(params, kd); +JitConstants MVNKernelBfyxOpt::GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData dispatchData) const { + auto jit = MVNKernelBase::GetJitConstants(params, dispatchData); jit.AddConstants({ - MakeJitConstant("ITEMS_NUM", kd.itemsNum), - MakeJitConstant("LWS", kd.lws0), - MakeJitConstant("GWS", kd.gws0), - MakeJitConstant("DATA_SETS_COUNT", kd.dataSetsCount), - MakeJitConstant("DATA_SET_SIZE", kd.dataSetSize), - MakeJitConstant("LEFTOVERS", kd.leftovers), + MakeJitConstant("ITEMS_NUM", dispatchData.itemsNum), + MakeJitConstant("LWS", dispatchData.lws[0]), + MakeJitConstant("GWS", dispatchData.gws[0]), + MakeJitConstant("DATA_SETS_COUNT", dispatchData.dataSetsCount), + MakeJitConstant("DATA_SET_SIZE", dispatchData.dataSetSize), + MakeJitConstant("LEFTOVERS", dispatchData.leftovers), }); auto activation_dt = GetActivationType(params); jit.Merge(MakeTypeJitConstants(activation_dt, "ACTIVATION")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h index 8fd6561..e184a98 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h @@ -39,6 +39,6 @@ private: }; } DispatchData SetDefault(const mvn_params& params) const override; - JitConstants GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData kd) const override; + JitConstants GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp index 63a7a34..296d683 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp @@ -43,8 +43,8 @@ ParamsKey MVNKernelRef::GetSupportedKey() const { return k; } -JitConstants MVNKernelRef::GetJitConstants(const mvn_params& params, DispatchData kd) const { - auto jits = Parent::GetJitConstants(params, kd); +JitConstants MVNKernelRef::GetJitConstants(const mvn_params& params, DispatchData dispatchData) const { + auto jits = Parent::GetJitConstants(params, dispatchData); auto activation_dt = GetActivationType(params); jits.Merge(MakeTypeJitConstants(activation_dt, "ACTIVATION")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h index 5a3f4e8..24f162c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h @@ -30,7 +30,7 @@ public: ParamsKey GetSupportedKey() const override; protected: - JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const mvn_params& params, DispatchData dispatchData) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp index 2f1d5ea..2ce61fe 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp @@ -42,29 +42,16 @@ JitConstants NormalizeKernelBase::GetJitConstants(const normalize_params& np) co NormalizeKernelBase::DispatchData NormalizeKernelBase::SetDefault(const normalize_params& params) const { const auto& output = params.output; - DispatchData kd; - - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - - std::vector global(3); - + DispatchData dispatchData; if (params.normMode == NormalizeMode::WITHIN_SPATIAL) { - global = {output.X().v, output.Y().v, output.Batch().v}; + dispatchData.gws = {output.X().v, output.Y().v, output.Batch().v}; } else { - global = {output.Batch().v, 1, 1}; + dispatchData.gws = {output.Batch().v, 1, 1}; } - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params, @@ -76,9 +63,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params, const normalize_params& orgParams = static_cast(params); - DispatchData runInfo; - - runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); @@ -88,7 +73,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params, auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp index 9a5482d..bd2e448 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,27 +33,15 @@ JitConstants OneHotKernelBase::GetJitConstants(const one_hot_params& params) con OneHotKernelBase::DispatchData OneHotKernelBase::SetDefault(const one_hot_params& params) { const auto& input = params.inputs[0]; - DispatchData kd; - - kd.fp16UnitUsed = input.GetDType() == Datatype::F16; - - std::vector global{input.Batch().v, input.Feature().v, input.Y().v * input.X().v}; + DispatchData dispatchData; if (params.output.GetDims().size() == 5) { - global[0] = input.Batch().v; - global[1] = input.Feature().v * input.Z().v; - global[2] = input.Y().v * input.X().v; + dispatchData.gws = { input.Batch().v, input.Feature().v * input.Z().v, input.Y().v * input.X().v }; + } else { + dispatchData.gws = { input.Batch().v, input.Feature().v, input.Y().v * input.X().v }; } - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - return kd; + return dispatchData; } KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params, @@ -64,7 +52,7 @@ KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params, const auto& prim_params = static_cast(params); - auto run_info = SetDefault(prim_params); + auto dispatchData = SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); @@ -72,7 +60,7 @@ KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); k_data.estimatedTime = estimated_time; return {k_data}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp index d7821a6..946b82e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp @@ -60,7 +60,7 @@ Datatype PoolingKernelBase::GetActivationType(const pooling_params& params) cons } -JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, PoolingKernelBase::DispatchData kd) const { +JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, PoolingKernelBase::DispatchData dispatchData) const { JitConstants mem_consts = MakeBaseParamsJitConstants(pp); mem_consts.AddConstants({ @@ -71,7 +71,7 @@ JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, Poolin MakeJitConstant(toString(pp.divMode) + "_KERNEL_DIVIDER", 1), }); - if (kd.needsBoundary) { + if (dispatchData.needsBoundary) { mem_consts.AddConstant(MakeJitConstant("CHECK_BOUNDRY", 1)); } @@ -131,48 +131,46 @@ bool PoolingKernelBase::EnableRound(const kernel_selector::pooling_params& param PoolingKernelBase::DispatchData PoolingKernelBase::SetDefault(const pooling_params& params) const { const auto& output = params.output; - DispatchData kd; - - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + DispatchData dispatchData; if (output.GetLayout() == DataLayout::bfyx || output.GetLayout() == DataLayout::b_fs_yx_fsv4 || output.GetLayout() == DataLayout::byxf || output.GetLayout() == DataLayout::bfzyx || output.GetLayout() == DataLayout::b_fs_zyx_fsv16 || output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16) { // Determine global work sizes. - kd.gws0 = Align(output.X().v, 32); // X - kd.gws1 = output.Y().v * output.Z().v; // Y, Z - kd.gws2 = output.Batch().v * output.Feature().v; // B, F + dispatchData.gws[0] = Align(output.X().v, 32); // X + dispatchData.gws[1] = output.Y().v * output.Z().v; // Y, Z + dispatchData.gws[2] = output.Batch().v * output.Feature().v; // B, F // Find largest positive local work size that is divider for global work size. - kd.lws0 = 32; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } else if (output.GetLayout() == DataLayout::b_fs_yx_fsv32 || output.GetLayout() == DataLayout::b_fs_zyx_fsv32) { - kd.gws0 = 32; - kd.gws1 = output.Y().v * output.X().v * output.Z().v; - kd.gws2 = output.Batch().v * CeilDiv(output.Feature().v, 32); + dispatchData.gws[0] = 32; + dispatchData.gws[1] = output.Y().v * output.X().v * output.Z().v; + dispatchData.gws[2] = output.Batch().v * CeilDiv(output.Feature().v, 32); - kd.lws0 = 32; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } else { // Determine global work sizes. - kd.gws0 = output.Batch().v * output.Feature().v; // B, F - kd.gws1 = output.X().v; // X - kd.gws2 = output.Y().v * output.Z().v; // Y * Z + dispatchData.gws[0] = output.Batch().v * output.Feature().v; // B, F + dispatchData.gws[1] = output.X().v; // X + dispatchData.gws[2] = output.Y().v * output.Z().v; // Y * Z - kd.lws0 = std::min(std::max(kd.gws0, static_cast(1)), static_cast(32)); - while (kd.gws0 % kd.lws0 != 0) { - --kd.lws0; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } - kd.needsBoundary = NeedsBoundaryCheck(params); + dispatchData.needsBoundary = NeedsBoundaryCheck(params); - return kd; + return dispatchData; } KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params, @@ -184,16 +182,16 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params, const pooling_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); - auto cldnn_jit = GetJitConstants(orgParams, runInfo); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX) kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h index a9bcfda..76e6bab 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.h @@ -65,7 +65,7 @@ public: protected: bool Validate(const Params&, const optional_params&) const override; - virtual JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const pooling_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const; Datatype GetAccumulatorType(const pooling_params& p) const; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp index 157430a..16df4fa 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp @@ -63,7 +63,7 @@ size_t PoolingKernel_b_fs_yx_fsv16::GetSimdSize(const pooling_params& params) co } PoolingKernelBase::DispatchData PoolingKernel_b_fs_yx_fsv16::SetDefault(const pooling_params& params) const { - DispatchData kd = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); const auto& out = params.output; const size_t alignment = GetSimdSize(params); @@ -73,25 +73,25 @@ PoolingKernelBase::DispatchData PoolingKernel_b_fs_yx_fsv16::SetDefault(const po auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = CeilDiv(x, x_block_size) * y; - kd.gws1 = Align(f, alignment); - kd.gws2 = b; + dispatchData.gws[0] = CeilDiv(x, x_block_size) * y; + dispatchData.gws[1] = Align(f, alignment); + dispatchData.gws[2] = b; - kd.lws0 = 1; - kd.lws1 = alignment; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = alignment; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_2; + dispatchData.efficiency = FORCE_PRIORITY_2; - return kd; + return dispatchData; } -JitConstants PoolingKernel_b_fs_yx_fsv16::GetJitConstants(const pooling_params& params, DispatchData runInfo) const { +JitConstants PoolingKernel_b_fs_yx_fsv16::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { const size_t alignment = GetSimdSize(params); size_t x_block_size = GetBlockSize(params); auto input = params.inputs[0]; auto output = params.output; - auto jit = PoolingKernelBase::GetJitConstants(params, runInfo); + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); size_t input_line_size = params.poolStride.x * (x_block_size - 1) + params.poolSize.x; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h index 6b35c94..06c3ea2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.h @@ -28,7 +28,7 @@ public: protected: bool Validate(const Params&, const optional_params&) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; DispatchData SetDefault(const pooling_params& params) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp index 6375f73..f38905d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp @@ -42,24 +42,19 @@ ParamsKey PoolingKerneGPU_b_fs_yx_fsv4::GetSupportedKey() const { } PoolingKernelBase::DispatchData PoolingKerneGPU_b_fs_yx_fsv4::SetDefault(const pooling_params& params) const { - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); - runInfo.gws0 = params.output.X().v; // X - runInfo.gws1 = params.output.Y().v; // Y + dispatchData.gws[0] = params.output.X().v; // X + dispatchData.gws[1] = params.output.Y().v; // Y // we got b_fs_yx_fsv4 format, we process 4 features per workitem - runInfo.gws2 = CeilDiv(params.output.Feature().v, 4) * params.output.Batch().v; + dispatchData.gws[2] = CeilDiv(params.output.Feature().v, 4) * params.output.Batch().v; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes({ runInfo.gws0, runInfo.gws1, runInfo.gws2 }, params.engineInfo); - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } -JitConstants PoolingKerneGPU_b_fs_yx_fsv4::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKerneGPU_b_fs_yx_fsv4::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); const size_t in_x_pitch = 4; const size_t in_y_pitch = 4 * params.inputs[0].X().LogicalDimPadded(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h index fd12d65..f771453 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h @@ -35,6 +35,6 @@ public: } protected: - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp index 802b218..1fa2473 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp @@ -45,7 +45,7 @@ ParamsKey PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetSupportedKey() const { } PoolingKernelBase::DispatchData PoolingKernelGPU_b_fs_zyx_fsv16_imad::SetDefault(const pooling_params& params) const { - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); const auto& out = params.output; auto x = out.X().v; @@ -54,22 +54,17 @@ PoolingKernelBase::DispatchData PoolingKernelGPU_b_fs_zyx_fsv16_imad::SetDefault auto f = out.Feature().v; auto b = out.Batch().v; - runInfo.gws0 = x; - runInfo.gws1 = y * z; + dispatchData.gws[0] = x; + dispatchData.gws[1] = y * z; // we got b_fs_yx_fsv16 format, we process 16 features per workitem - runInfo.gws2 = CeilDiv(f, FEATURE_SLICE_SIZE) * b; + dispatchData.gws[2] = CeilDiv(f, FEATURE_SLICE_SIZE) * b; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes({ runInfo.gws0, runInfo.gws1, runInfo.gws2 }, params.engineInfo); - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } -JitConstants PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); const size_t in_x_pitch = FEATURE_SLICE_SIZE; const size_t in_y_pitch = FEATURE_SLICE_SIZE * params.inputs[0].X().LogicalDimPadded(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h index 8870a6f..fe16878 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.h @@ -36,6 +36,6 @@ public: } protected: - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp index 4088e22..f3c4ea5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp @@ -42,15 +42,15 @@ ParamsKey PoolingKernelGPUBfyxBlockOpt::GetSupportedKey() const { PoolingKernelBase::DispatchData PoolingKernelGPUBfyxBlockOpt::SetDefault(const pooling_params& params) const { const auto& output = params.output; - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); - runInfo.gws1 = CeilDiv(output.Y().v, params.poolSize.y); + dispatchData.gws[1] = CeilDiv(output.Y().v, params.poolSize.y); - return runInfo; + return dispatchData; } -JitConstants PoolingKernelGPUBfyxBlockOpt::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKernelGPUBfyxBlockOpt::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); jit.AddConstant( MakeJitConstant("BLOCK_SIZE_Y", params.poolSize.y + params.poolSize.y * params.poolStride.y - 1)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h index 4b77a84..b093a1a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h @@ -28,7 +28,7 @@ public: protected: bool Validate(const Params&, const optional_params&) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; DispatchData SetDefault(const pooling_params& params) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp index a0af34f..ef06a7e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp @@ -50,22 +50,22 @@ ParamsKey Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetSupportedKey() const { } PoolingKernelBase::DispatchData Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::SetDefault(const pooling_params& params) const { - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); - runInfo.gws0 = params.output.Feature().v/16; - runInfo.gws1 = params.output.X().v * params.output.Y().v; - runInfo.gws2 = params.output.Batch().v; + dispatchData.gws[0] = params.output.Feature().v/16; + dispatchData.gws[1] = params.output.X().v * params.output.Y().v; + dispatchData.gws[2] = params.output.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = SIMD_SIZE; - runInfo.efficiency = FORCE_PRIORITY_1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = SIMD_SIZE; + dispatchData.efficiency = FORCE_PRIORITY_1; - return runInfo; + return dispatchData; } -JitConstants Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = EnableRound(params) ? Datatype::INT32 : GetActivationType(params); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h index 4651dbd..5607b79 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.h @@ -36,6 +36,6 @@ public: } protected: - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp index 93ae175..445312f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.cpp @@ -50,7 +50,7 @@ ParamsKey PoolingKernel_bsv16_fsv16::GetSupportedKey() const { } PoolingKernelBase::DispatchData PoolingKernel_bsv16_fsv16::SetDefault(const pooling_params& params) const { - DispatchData kd = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); const auto& out = params.output; @@ -60,17 +60,17 @@ PoolingKernelBase::DispatchData PoolingKernel_bsv16_fsv16::SetDefault(const pool auto f = out.Feature().v; auto b = out.Batch().v; - kd.gws0 = Align(f, feature_block_size); - kd.gws1 = x * y * z; - kd.gws2 = CeilDiv(b, batch_block_size); + dispatchData.gws[0] = Align(f, feature_block_size); + dispatchData.gws[1] = x * y * z; + dispatchData.gws[2] = CeilDiv(b, batch_block_size); - kd.lws0 = sub_group_size; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = sub_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - kd.efficiency = FORCE_PRIORITY_1; + dispatchData.efficiency = FORCE_PRIORITY_1; - return kd; + return dispatchData; } bool PoolingKernel_bsv16_fsv16::Validate(const Params& p, const optional_params& o) const { @@ -98,10 +98,10 @@ bool PoolingKernel_bsv16_fsv16::Validate(const Params& p, const optional_params& return true; } -JitConstants PoolingKernel_bsv16_fsv16::GetJitConstants(const pooling_params& params, DispatchData runInfo) const { +JitConstants PoolingKernel_bsv16_fsv16::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { auto input = params.inputs[0]; auto output = params.output; - auto jit = PoolingKernelBase::GetJitConstants(params, runInfo); + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); jit.AddConstant(MakeJitConstant("OC_BLOCK", feature_block_size)); jit.AddConstant(MakeJitConstant("MB_BLOCK", batch_block_size)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h index fc2ebc2..2e938b6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bsv16_fsv16.h @@ -32,7 +32,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; DispatchData SetDefault(const pooling_params& params) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp index b5d9e47..8cb55bb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.cpp @@ -41,15 +41,15 @@ ParamsKey PoolingKernelGPUByxfOpt::GetSupportedKey() const { PoolingKernelBase::DispatchData PoolingKernelGPUByxfOpt::SetDefault(const pooling_params& params) const { const auto& output = params.output; - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); - runInfo.gws2 = output.Batch().v * (CeilDiv(output.Feature().v, 8)); + dispatchData.gws[2] = output.Batch().v * (CeilDiv(output.Feature().v, 8)); - return runInfo; + return dispatchData; } -JitConstants PoolingKernelGPUByxfOpt::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKernelGPUByxfOpt::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h index 5c65477..4bc0249 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h @@ -28,7 +28,7 @@ public: protected: bool Validate(const Params&, const optional_params&) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; DispatchData SetDefault(const pooling_params& params) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp index 655f164..2df5ab4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.cpp @@ -41,15 +41,15 @@ ParamsKey PoolingKernelGPUByxfPaddingOpt::GetSupportedKey() const { PoolingKernelBase::DispatchData PoolingKernelGPUByxfPaddingOpt::SetDefault(const pooling_params& params) const { const auto& output = params.output; - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); - runInfo.gws2 = output.Batch().v * (CeilDiv(output.Feature().v, 8)); + dispatchData.gws[2] = output.Batch().v * (CeilDiv(output.Feature().v, 8)); - return runInfo; + return dispatchData; } -JitConstants PoolingKernelGPUByxfPaddingOpt::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKernelGPUByxfPaddingOpt::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h index f7566aa..9614953 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h @@ -28,7 +28,7 @@ public: protected: bool Validate(const Params&, const optional_params&) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; DispatchData SetDefault(const pooling_params& params) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp index 25ccfe1..62d570e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.cpp @@ -43,19 +43,19 @@ ParamsKey PoolingKerneGPU_fs_b_yx_fsv32::GetSupportedKey() const { } PoolingKernelBase::DispatchData PoolingKerneGPU_fs_b_yx_fsv32::SetDefault(const pooling_params& params) const { - DispatchData runInfo = PoolingKernelBase::SetDefault(params); + DispatchData dispatchData = PoolingKernelBase::SetDefault(params); - runInfo.gws0 = params.output.X().v; // X output blocks - runInfo.gws1 = params.output.Y().v; // Y output clocks + dispatchData.gws[0] = params.output.X().v; // X output blocks + dispatchData.gws[1] = params.output.Y().v; // Y output clocks // in fs_b_yx_fsv32 format we will process 2 features per work item, so reads/writes are done in full writes for // fp16 - runInfo.gws2 = RoundUp(params.output.Feature().v, 32) * params.output.Batch().v / 2; + dispatchData.gws[2] = RoundUp(params.output.Feature().v, 32) * params.output.Batch().v / 2; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 16; - return runInfo; + return dispatchData; } bool PoolingKerneGPU_fs_b_yx_fsv32::Validate(const Params& p, const optional_params& o) const { @@ -74,8 +74,8 @@ bool PoolingKerneGPU_fs_b_yx_fsv32::Validate(const Params& p, const optional_par return true; } -JitConstants PoolingKerneGPU_fs_b_yx_fsv32::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKerneGPU_fs_b_yx_fsv32::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); auto pp = static_cast(params); // Heurestic needed for very big pool size. diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h index 5bb61fa..d224be0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_b_yx_fsv32.h @@ -29,7 +29,7 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, FusedOpType::SCALE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp index 9df0eba..66df152 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.cpp @@ -59,8 +59,8 @@ KernelsData PoolingKernelGPUInt8Ref::GetKernelsData(const Params& params, const return GetCommonKernelsData(params, options, FORCE_PRIORITY_9); } -JitConstants PoolingKernelGPUInt8Ref::GetJitConstants(const pooling_params& params, DispatchData kd) const { - JitConstants jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKernelGPUInt8Ref::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + JitConstants jit = PoolingKernelBase::GetJitConstants(params, dispatchData); jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h index 6def2a4..aeae541 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h @@ -27,7 +27,7 @@ public: KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; bool Validate(const Params&, const optional_params&) const override; - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, FusedOpType::SCALE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp index 67dfa1d..8568b64 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp @@ -41,8 +41,8 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const { return k; } -JitConstants PoolingKernelGPURef::GetJitConstants(const pooling_params& params, DispatchData kd) const { - auto jit = PoolingKernelBase::GetJitConstants(params, kd); +JitConstants PoolingKernelGPURef::GetJitConstants(const pooling_params& params, DispatchData dispatchData) const { + auto jit = PoolingKernelBase::GetJitConstants(params, dispatchData); jit.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h index e42bcc8..4afdbad 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h @@ -32,6 +32,6 @@ public: } protected: - JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const pooling_params& params, DispatchData dispatchData) const override; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp index 4f40013..05da60c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2019 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,24 +31,10 @@ JitConstants PyramidROIAlignKernelBase::GetJitConstants(const PyramidROIAlign_pa } PyramidROIAlignKernelBase::DispatchData PyramidROIAlignKernelBase::SetDefault(const PyramidROIAlign_params& params) const { - DispatchData kd; - - kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - - std::vector global; - global = {1, 1, 1}; - - const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + DispatchData dispatchData; + dispatchData.gws = {1, 1, 1}; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + return dispatchData; } KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params, @@ -58,7 +44,7 @@ KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params const auto& prim_params = static_cast(params); - auto run_info = SetDefault(prim_params); + auto dispatchData = SetDefault(prim_params); KernelData k_data = KernelData::Default(params); auto cldnn_jit = GetJitConstants(prim_params); auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options); @@ -66,7 +52,7 @@ KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params auto& kernel = k_data.kernels[0]; FillCLKernelData(kernel, - run_info, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp index 5da8914..d9446c9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -42,24 +42,16 @@ ParamsKey PyramidROIAlignKernelRef::GetSupportedKey() const { } PyramidROIAlignKernelBase::DispatchData PyramidROIAlignKernelRef::SetDefault(const PyramidROIAlign_params& params) const { - auto dispatch = PyramidROIAlignKernelBase::SetDefault(params); + auto dispatchData = PyramidROIAlignKernelBase::SetDefault(params); - std::vector global = { + dispatchData.gws = { params.output.X().v * params.output.Y().v, params.output.Feature().v, params.output.Batch().v }; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - dispatch.gws0 = global[0]; - dispatch.gws1 = global[1]; - dispatch.gws2 = global[2]; - - dispatch.lws0 = local[0]; - dispatch.lws1 = local[1]; - dispatch.lws2 = local[2]; - - return dispatch; + return dispatchData; } KernelsData PyramidROIAlignKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp index 5ec6054..d52551c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp @@ -33,7 +33,7 @@ bool QuantizeKernelBase::Validate(const Params& p, const optional_params&) const return true; } -JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const { +JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const { JitConstants jit = MakeBaseParamsJitConstants(params); if (params.packed_binary_output) { @@ -55,9 +55,9 @@ JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, jit.AddConstant(MakeJitConstant("LEVELS", static_cast(params.levels))); - jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0)); - jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1)); - jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2)); + jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0])); + jit.AddConstant(MakeJitConstant("LWS_1", dispatchData.lws[1])); + jit.AddConstant(MakeJitConstant("LWS_2", dispatchData.lws[2])); return jit; } @@ -72,15 +72,15 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio return {}; } - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); - auto cldnn_jit = GetJitConstants(newParams, runInfo); + auto cldnn_jit = GetJitConstants(newParams, dispatchData); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); kernel.arguments = GetArgsDesc(static_cast(newParams.inputs.size()), false, false); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h index 480e786..c03ef65 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h @@ -29,7 +29,7 @@ public: KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; protected: - virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const; + virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const; virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp index 27fe85f..61443bd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - -#include #include "quantize_kernel_ref.h" #include "kernel_selector_utils.h" #include @@ -41,35 +39,33 @@ ParamsKey QuantizeKernelRef::GetSupportedKey() const { } CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; auto output = params.output; if (output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) { - runInfo.gws0 = output.Batch().v; - runInfo.gws1 = Align(output.Feature().v, sub_group_size); - runInfo.gws2 = output.Y().v * output.X().v * output.Z().v; + dispatchData.gws[0] = output.Batch().v; + dispatchData.gws[1] = Align(output.Feature().v, sub_group_size); + dispatchData.gws[2] = output.Y().v * output.X().v * output.Z().v; - runInfo.lws0 = 1; - runInfo.lws1 = sub_group_size; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; } else { - runInfo.gws0 = output.Batch().v; - runInfo.gws1 = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v; - runInfo.gws2 = Align(output.X().v * output.Y().v * output.Z().v, 16); + dispatchData.gws[0] = output.Batch().v; + dispatchData.gws[1] = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v; + dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v, 16); - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 16; } - runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - - return runInfo; + return dispatchData; } -JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(params, runInfo); +JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) { jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h index f0263b2..5e9bfab 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h @@ -26,7 +26,7 @@ public: QuantizeKernelRef() : QuantizeKernelBase("quantize_gpu_ref") {} virtual ~QuantizeKernelRef() {} - JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override; + JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override; CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override; bool Validate(const Params& p, const optional_params& o) const override; ParamsKey GetSupportedKey() const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp index 8023c56..cd29dbf 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -61,38 +61,28 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const { } CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; auto output = params.output; if (output.GetLayout() == DataLayout::b_fs_yx_fsv16) { - runInfo.gws0 = output.Y().v * output.X().v; - runInfo.gws1 = Align(output.Feature().v, sub_group_size); - runInfo.gws2 = output.Batch().v; + dispatchData.gws[0] = output.Y().v * output.X().v; + dispatchData.gws[1] = Align(output.Feature().v, sub_group_size); + dispatchData.gws[2] = output.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = sub_group_size; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; } else { - auto global = GetTensorFriendlyWorkGroups(output); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; + dispatchData.gws = GetTensorFriendlyWorkGroups(output); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); } - runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - - return runInfo; + return dispatchData; } -JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const { - JitConstants jit = Parent::GetJitConstants(params, runInfo); +JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const { + JitConstants jit = Parent::GetJitConstants(params, dispatchData); if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16) { jit.AddConstant(MakeJitConstant("GWS_BATCH", 2)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h index d88dfb3..ac07815 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h @@ -26,7 +26,7 @@ public: QuantizeKernelScaleShift() : QuantizeKernelBase("quantize_gpu_scale_shift_opt") {} virtual ~QuantizeKernelScaleShift() {} - JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override; + JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override; CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override; bool Validate(const Params& p, const optional_params& o) const override; ParamsKey GetSupportedKey() const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp index 56cf279..5548d52 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.cpp @@ -72,22 +72,15 @@ ParamsKey ReduceKernel_b_fs_yx_fsv16::GetSupportedKey() const { } CommonDispatchData ReduceKernel_b_fs_yx_fsv16::SetDefault(const reduce_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; auto in_dims = calc_in_dims(params); - std::vector global = {16, - CeilDiv(in_dims[3].v, calc_read_offset(params)) * in_dims[2].v, // X, Y - CeilDiv(in_dims[1].v, SIMD) * in_dims[0].v}; // F, B + dispatchData.gws = { 16, + CeilDiv(in_dims[3].v, calc_read_offset(params)) * in_dims[2].v, // X, Y + CeilDiv(in_dims[1].v, SIMD) * in_dims[0].v }; // F, B + dispatchData.lws = { SIMD, 1, 1 }; - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = SIMD; - runInfo.lws1 = 1; - runInfo.lws2 = 1; - - return runInfo; + return dispatchData; } JitConstants ReduceKernel_b_fs_yx_fsv16::GetJitConstants(const reduce_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp index 526080e..3db770f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_base.cpp @@ -235,7 +235,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p, } const reduce_params& params = static_cast(p); - DispatchData runInfo = SetDefault(params, options); + DispatchData dispatchData = SetDefault(params, options); KernelData kd = KernelData::Default(params); @@ -245,7 +245,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p, auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, - runInfo, + dispatchData, params.engineInfo, kernelName, jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp index ca26a37..5cd1f6d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp @@ -43,23 +43,14 @@ ParamsKey ReduceKernelRef::GetSupportedKey() const { } CommonDispatchData ReduceKernelRef::SetDefault(const reduce_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = {params.output.X().v * params.output.Y().v, - params.output.Z().v * params.output.W().v, - params.output.Batch().v * params.output.Feature().v}; + dispatchData.gws = { params.output.X().v * params.output.Y().v, + params.output.Z().v * params.output.W().v, + params.output.Batch().v * params.output.Feature().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants ReduceKernelRef::GetJitConstants(const reduce_params& params) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp index aa66932..a253aff 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,35 +47,23 @@ JitConstants RegionYoloKernelRef::GetJitConstants(const region_yolo_params& ry) } RegionYoloKernelRef::DispatchData SetDefault(const region_yolo_params& params) { - RegionYoloKernelRef::DispatchData kd; - - kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16); + RegionYoloKernelRef::DispatchData dispatchData; const auto& input = params.inputs[0]; - std::vector global; if (input.GetLayout() == DataLayout::bfyx) { - global = {input.X().v * input.Y().v, 1, 1}; + dispatchData.gws = {input.X().v * input.Y().v, 1, 1}; } else { - global = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v}; + dispatchData.gws = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v}; } - // Determine global work sizes. - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - return kd; + return dispatchData; } KernelsData RegionYoloKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { assert(params.GetType() == KernelType::REGION_YOLO); const region_yolo_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); auto cldnn_jit = GetJitConstants(orgParams); @@ -83,7 +71,7 @@ KernelsData RegionYoloKernelRef::GetKernelsData(const Params& params, const opti auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = FORCE_PRIORITY_9; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp index e746526..2238996 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,21 +45,21 @@ JitConstants ReorderFromWinograd2x3Kernel::GetJitConstants(const reorder_params& ReorderFromWinograd2x3Kernel::DispatchData ReorderFromWinograd2x3Kernel::SetDefault( const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; constexpr auto output_tile_width = 2; // by definition of F(2,3) const auto& input = params.inputs[0]; const auto& output = params.output; - kd.gws0 = static_cast(output.Feature().v * output.Batch().v); - kd.gws1 = static_cast(output.X().v / output_tile_width); - kd.gws2 = static_cast(output.Y().v); + dispatchData.gws[0] = static_cast(output.Feature().v * output.Batch().v); + dispatchData.gws[1] = static_cast(output.X().v / output_tile_width); + dispatchData.gws[2] = static_cast(output.Y().v); - kd.lws0 = input.Feature().v > 32 ? 32 : static_cast(input.Feature().v); - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = input.Feature().v > 32 ? 32 : static_cast(input.Feature().v); + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } KernelsData ReorderFromWinograd2x3Kernel::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp index 43491a2..ded7b6a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -151,26 +151,16 @@ JitConstants ReorderKernelBase::GetJitConstants(const reorder_params& params) co ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_weights_params& params) const { const auto& out = params.output; - DispatchData kd; + DispatchData dispatchData; - std::vector global(3); + dispatchData.gws = { out.G().v * out.OFM().v, out.IFM().v, out.X().v * out.Y().v * out.Z().v }; + dispatchData.lws= GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - global = {out.G().v * out.OFM().v, out.IFM().v, out.X().v * out.Y().v * out.Z().v}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; auto& input = params.inputs[0]; DataTensor input_tensor = input; @@ -183,36 +173,28 @@ ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_para input_tensor = DataTensor(input_sizes, input.GetDType(), DataLayout::image_2d_rgba); } - auto global = GetTensorFriendlyWorkGroups(input_tensor); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.gws = GetTensorFriendlyWorkGroups(input_tensor); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) { std::vector sizes = { 32, 16, 8, 4 }; for (auto& s : sizes) { - if (kd.gws2 % s == 0) { - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = s; + if (dispatchData.gws[2] % s == 0) { + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = s; break; } } } if (params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 && params.inputs[0].Feature().v % 16 == 0) { - kd.lws0 = 1; - kd.lws1 = 16; - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 16; + dispatchData.lws[2] = 1; } - return kd; + return dispatchData; } KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params& params, const optional_params& options, float estimated_time) const { @@ -223,9 +205,9 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params KernelData kd = KernelData::Default(params); reorder_weights_params& newParams = *static_cast(kd.params.get()); - DispatchData runInfo; + DispatchData dispatchData; - runInfo = SetDefault(newParams); + dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); @@ -233,7 +215,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kernel.arguments = GetArgsDesc(1, false, false); @@ -251,9 +233,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params KernelData kd = KernelData::Default(params); reorder_params& newParams = *static_cast(kd.params.get()); - DispatchData runInfo; - - runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); @@ -261,7 +241,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kernel.arguments = GetArgsDesc(1, false, false); if (newParams.mode == MeanSubtractMode::IN_BUFFER) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp index c4a651f..10d4c1e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -60,22 +60,14 @@ JitConstants ReorderKernelBinary::GetJitConstants(const reorder_params& params) } ReorderKernelBinary::DispatchData ReorderKernelBinary::SetDefault(const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.inputs[0]; - std::vector global{input.Batch().v, CeilDiv(input.Feature().v, 32), input.Y().v * input.X().v}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { input.Batch().v, CeilDiv(input.Feature().v, 32), input.Y().v * input.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData ReorderKernelBinary::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp index 9bdc21c..f71a2ab 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -92,21 +92,21 @@ JitConstants ReorderKernelFastBatch1::GetJitConstants(const reorder_params& para } ReorderKernelFastBatch1::DispatchData ReorderKernelFastBatch1::SetDefault(const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& output = params.output; unsigned int gws = (unsigned int)output.LogicalSize(); - kd.gws0 = Align(gws, 32); - kd.gws1 = 1; - kd.gws2 = 1; + dispatchData.gws[0] = Align(gws, 32); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - kd.lws0 = 32; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } KernelsData ReorderKernelFastBatch1::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp index 2db6641..83c80b2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fs_b_yx_fsv32_to_bfyx.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -68,19 +68,19 @@ JitConstants ReorderKernel_fs_b_yx_fsv32_to_bfyx::GetJitConstants(const reorder_ } ReorderKernelBase::DispatchData ReorderKernel_fs_b_yx_fsv32_to_bfyx::SetDefault(const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; auto x_aligned = Align(params.output.X().v, x_block_align); - kd.gws0 = params.output.Batch().v; - kd.gws1 = Align(params.output.Feature().v, fsv); - kd.gws2 = params.output.Y().v * x_aligned / GetOptimalSize(x_aligned, optimal_x_sizes); + dispatchData.gws[0] = params.output.Batch().v; + dispatchData.gws[1] = Align(params.output.Feature().v, fsv); + dispatchData.gws[2] = params.output.Y().v * x_aligned / GetOptimalSize(x_aligned, optimal_x_sizes); - kd.lws0 = 1; - kd.lws1 = GetOptimalSize(kd.gws1, optimal_feature_sizes); - kd.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = GetOptimalSize(dispatchData.gws[1], optimal_feature_sizes); + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } KernelsData ReorderKernel_fs_b_yx_fsv32_to_bfyx::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp index 8e2a284..0874f57 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -67,21 +67,21 @@ JitConstants ReorderKernel_to_yxfb_batched::GetJitConstants(const reorder_params } ReorderKernelBase::DispatchData ReorderKernel_to_yxfb_batched::SetDefault(const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.inputs[0]; unsigned int gws = (unsigned int)input.LogicalSize(); - kd.gws0 = Align(gws, 8 * input.Batch().v) / input.Batch().v; - kd.gws1 = 1; - kd.gws2 = 1; + dispatchData.gws[0] = Align(gws, 8 * input.Batch().v) / input.Batch().v; + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - kd.lws0 = 8; - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = 8; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } KernelsData ReorderKernel_to_yxfb_batched::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp index 26f3dae..906bd56 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,20 +43,20 @@ JitConstants ReorderToWinograd2x3Kernel::GetJitConstants(const reorder_params& p } ReorderToWinograd2x3Kernel::DispatchData ReorderToWinograd2x3Kernel::SetDefault(const reorder_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.inputs[0]; const auto& output = params.output; - kd.gws0 = static_cast(input.Feature().v * input.Batch().v); - kd.gws1 = static_cast(params.winograd_nr_tiles_x); - kd.gws2 = static_cast(output.Y().v); + dispatchData.gws[0] = static_cast(input.Feature().v * input.Batch().v); + dispatchData.gws[1] = static_cast(params.winograd_nr_tiles_x); + dispatchData.gws[2] = static_cast(output.Y().v); - kd.lws0 = input.Feature().v > 32 ? 32 : static_cast(input.Feature().v); - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[0] = input.Feature().v > 32 ? 32 : static_cast(input.Feature().v); + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } KernelsData ReorderToWinograd2x3Kernel::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp index 3e86a5f..8012dc9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_binary_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,20 +34,12 @@ ReorderWeightsBinaryKernel::DispatchData ReorderWeightsBinaryKernel::SetDefault( const reorder_weights_params& params) const { const auto& out = params.output; - DispatchData kd; + DispatchData dispatchData; - std::vector global = {out.OFM().v, CeilDiv(out.IFM().v, 32), out.X().v * out.Y().v}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { out.OFM().v, CeilDiv(out.IFM().v, 32), out.X().v * out.Y().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData ReorderWeightsBinaryKernel::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp index 28b6b4e..24a2194 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,26 +33,15 @@ ParamsKey ReorderWeightsImage_fyx_b_Kernel::GetSupportedKey() const { return k; } -ReorderWeightsImage_fyx_b_Kernel::DispatchData ReorderWeightsImage_fyx_b_Kernel::SetDefault( - const reorder_weights_params& params) const { +ReorderWeightsImage_fyx_b_Kernel::DispatchData ReorderWeightsImage_fyx_b_Kernel::SetDefault(const reorder_weights_params& params) const { const auto& out = params.output; - DispatchData kd; + DispatchData dispatchData; - std::vector global(3); + dispatchData.gws = { out.OFM().v, Align(out.X().v * out.Y().v * out.IFM().v, 4) / 4, 1 }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - global = {out.OFM().v, Align(out.X().v * out.Y().v * out.IFM().v, 4) / 4, 1}; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData ReorderWeightsImage_fyx_b_Kernel::GetKernelsData(const Params& params, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp index 467fa07..d9f8ba5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,19 +35,19 @@ ParamsKey ReorderWeightsImageWinograd6x3Kernel::GetSupportedKey() const { ReorderWeightsImageWinograd6x3Kernel::DispatchData ReorderWeightsImageWinograd6x3Kernel::SetDefault( const reorder_weights_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.input; - kd.gws0 = 1; - kd.gws1 = 3; - kd.gws2 = static_cast(input.IFM().v * input.OFM().v); + dispatchData.gws[0] = 1; + dispatchData.gws[1] = 3; + dispatchData.gws[2] = static_cast(input.IFM().v * input.OFM().v); - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 32; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 32; - return kd; + return dispatchData; } KernelsData ReorderWeightsImageWinograd6x3Kernel::GetKernelsData(const Params& params, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp index 09b0d77..32536f9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_opt.cpp @@ -107,7 +107,7 @@ static inline size_t GetOptimalSize(size_t val, std::vector optimal_size ReorderWeightsOpt::DispatchData ReorderWeightsOpt::SetDefault( const reorder_weights_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& output = params.output; const auto output_layout = output.GetLayout(); @@ -123,22 +123,19 @@ ReorderWeightsOpt::DispatchData ReorderWeightsOpt::SetDefault( const auto ifm_block = (osv_first) ? ifm_block_supported ? GetOptimalSize(output.IFM().v, preferred_sizes) : 1 : subgroup_size; - std::vector global; if (osv_first) { - global = {output.G().v * (output.IFM().v / ifm_block), output.Z().v * output.Y().v * output.X().v, Align(output.OFM().v, ofm_block)}; + dispatchData.gws = { output.G().v * (output.IFM().v / ifm_block), + output.Z().v * output.Y().v * output.X().v, + Align(output.OFM().v, ofm_block) }; } else { - global = {output.G().v * (output.OFM().v / ofm_block), output.Z().v * output.Y().v * output.X().v, Align(output.IFM().v, ifm_block)}; + dispatchData.gws = { output.G().v * (output.OFM().v / ofm_block), + output.Z().v * output.Y().v * output.X().v, + Align(output.IFM().v, ifm_block) }; } - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; + dispatchData.lws = { 1, 1, 16 }; - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 16; - - return kd; + return dispatchData; } JitConstants ReorderWeightsOpt::GetJitConstants(const reorder_weights_params& params) const { @@ -174,7 +171,7 @@ bool ReorderWeightsOpt::Validate(const Params& params, const optional_params& /* const auto& p = static_cast(params); const auto& input = p.input; const auto& output = p.output; - + if (input.GroupedLayout() != output.GroupedLayout()) { return false; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp index 84ad96b..cca683f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,19 +35,19 @@ ParamsKey ReorderWeightsWinograd2x3Kernel::GetSupportedKey() const { ReorderWeightsWinograd2x3Kernel::DispatchData ReorderWeightsWinograd2x3Kernel::SetDefault( const reorder_weights_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.input; - kd.gws0 = 1; - kd.gws1 = 3; - kd.gws2 = static_cast(input.IFM().v * input.OFM().v); + dispatchData.gws[0] = 1; + dispatchData.gws[1] = 3; + dispatchData.gws[2] = static_cast(input.IFM().v * input.OFM().v); - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 32; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 32; - return kd; + return dispatchData; } KernelsData ReorderWeightsWinograd2x3Kernel::GetKernelsData(const Params& params, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp index 2ac9dd0..b9355d6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,19 +34,19 @@ ParamsKey ReorderWeightsWinograd6x3Kernel::GetSupportedKey() const { ReorderWeightsWinograd6x3Kernel::DispatchData ReorderWeightsWinograd6x3Kernel::SetDefault( const reorder_weights_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& input = params.input; - kd.gws0 = 1; - kd.gws1 = 3; - kd.gws2 = static_cast(input.IFM().v * input.OFM().v); + dispatchData.gws[0] = 1; + dispatchData.gws[1] = 3; + dispatchData.gws[2] = static_cast(input.IFM().v * input.OFM().v); - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = 32; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 32; - return kd; + return dispatchData; } KernelsData ReorderWeightsWinograd6x3Kernel::GetKernelsData(const Params& params, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp index 3e06aec..8f55732 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -44,35 +44,23 @@ JitConstants ReorgYoloKernelRef::GetJitConstants(const reorg_yolo_params& ry) co return jit; } ReorgYoloKernelRef::DispatchData SetDefault(const reorg_yolo_params& params) { - ReorgYoloKernelRef::DispatchData kd; - - kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16); + ReorgYoloKernelRef::DispatchData dispatchData; const auto& input = params.inputs[0]; - std::vector global; if (input.GetLayout() == DataLayout::bfyx) { - global = {input.X().v, input.Y().v, input.Feature().v}; + dispatchData.gws = {input.X().v, input.Y().v, input.Feature().v}; } else { - global = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v}; + dispatchData.gws = {input.Feature().v * input.Batch().v, input.X().v, input.Y().v}; } - // Determine global work sizes. - kd.gws0 = global[0]; - kd.gws1 = global[1]; - kd.gws2 = global[2]; - - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - return kd; + return dispatchData; } KernelsData ReorgYoloKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { assert(params.GetType() == KernelType::REORG_YOLO); const reorg_yolo_params& orgParams = static_cast(params); - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); auto cldnn_jit = GetJitConstants(orgParams); @@ -80,7 +68,7 @@ KernelsData ReorgYoloKernelRef::GetKernelsData(const Params& params, const optio auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = FORCE_PRIORITY_9; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp index 6f933f4..1ff3913 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp @@ -58,40 +58,29 @@ size_t ResampleKernelBase::GetFeatureBlockSize(const resample_params& params) co } ResampleKernelBase::DispatchData ResampleKernelBase::SetDefault(const kernel_selector::resample_params &arg) const { - DispatchData runInfo; - std::vector global; - std::vector local; + DispatchData dispatchData; const auto& out = arg.output; if (arg.resampleType == ResampleType::NEAREST_NEIGHBOR) - global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; + dispatchData.gws = { out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v }; else if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) - global = {Align(out.X().v, 32), out.Y().v, out.Batch().v}; + dispatchData.gws = { Align(out.X().v, 32), out.Y().v, out.Batch().v }; else if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP) - global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v}; + dispatchData.gws = { out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v }; else - global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; + dispatchData.gws = { out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v }; - local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo); if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) { - local[0] = 32; - local[1] = 1; - local[2] = 1; + dispatchData.lws[0] = 32; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; + dispatchData.efficiency = FORCE_PRIORITY_7; - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - runInfo.efficiency = FORCE_PRIORITY_7; - runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16; - - return runInfo; + return dispatchData; } bool ResampleKernelBase::Validate(const Params& p, const optional_params& o) const { @@ -227,16 +216,16 @@ KernelsData ResampleKernelBase::GetCommonKernelsData(const Params& params, const KernelData kd = KernelData::Default(params); resample_params& newParams = *static_cast(kd.params.get()); - auto runInfo = SetDefault(newParams); + auto dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp index 9a74a61..da201ed 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_opt.cpp @@ -52,21 +52,20 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const { } ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_selector::resample_params &arg) const { - DispatchData runInfo; + DispatchData dispatchData; const auto& out = arg.output; - runInfo.gws0 = CeilDiv(out.X().v, GetOptimalBlockSize(arg)) * out.Y().v; - runInfo.gws1 = Align(out.Feature().v, sub_group_size); - runInfo.gws2 = arg.output.Batch().v; + dispatchData.gws[0] = CeilDiv(out.X().v, GetOptimalBlockSize(arg)) * out.Y().v; + dispatchData.gws[1] = Align(out.Feature().v, sub_group_size); + dispatchData.gws[2] = arg.output.Batch().v; - runInfo.lws0 = 1; - runInfo.lws1 = sub_group_size; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = sub_group_size; + dispatchData.lws[2] = 1; - runInfo.efficiency = FORCE_PRIORITY_3; - runInfo.fp16UnitUsed = out.GetDType() == Datatype::F16; + dispatchData.efficiency = FORCE_PRIORITY_3; - return runInfo; + return dispatchData; } bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp index 90069a7..eb66fba 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -123,25 +123,14 @@ JitConstants ResampleKernelRef::GetJitConstants(const resample_params& params) c } ResampleKernelBase::DispatchData ResampleKernelRef::SetDefault(const resample_params& arg) const { - auto dispatch = Parent::SetDefault(arg); + auto dispatchData = Parent::SetDefault(arg); if (use_packing(arg)) { auto pack = packing_factor(arg); - std::vector global; - std::vector local; - - global = { arg.output.X().v, arg.output.Y().v * arg.output.Z().v, CeilDiv(arg.output.Feature().v, pack) * arg.output.Batch().v }; - local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo); - - dispatch.gws0 = global[0]; - dispatch.gws1 = global[1]; - dispatch.gws2 = global[2]; - - dispatch.lws0 = local[0]; - dispatch.lws1 = local[1]; - dispatch.lws2 = local[2]; + dispatchData.gws = { arg.output.X().v, arg.output.Y().v * arg.output.Z().v, CeilDiv(arg.output.Feature().v, pack) * arg.output.Batch().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo); } - return dispatch; + return dispatchData; } } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp index f3926a7..bcd95a8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,23 +41,15 @@ ParamsKey ReverseSequenceKernelRef::GetSupportedKey() const { CommonDispatchData ReverseSequenceKernelRef::SetDefault(const reverse_sequence_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = {params.output.Batch().v, - params.output.Feature().v, - params.output.Y().v * params.output.X().v}; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Y().v * params.output.X().v }; - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants ReverseSequenceKernelRef::GetJitConstants(const reverse_sequence_params& params) const { @@ -75,14 +67,14 @@ KernelsData ReverseSequenceKernelRef::GetKernelsData(const Params& params, const assert(params.GetType() == KernelType::REVERSE_SEQUENCE); - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp index b084ac4..1dbba05 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,24 +18,22 @@ namespace kernel_selector { static ROIPoolingKernelBase::DispatchData SetDefault(const roi_pooling_params& params) { - ROIPoolingKernelBase::DispatchData kd; - - kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16); + ROIPoolingKernelBase::DispatchData dispatchData; // Determine global work sizes. - kd.gws0 = params.output.LogicalSize(); - kd.gws1 = 1; - kd.gws2 = 1; + dispatchData.gws[0] = params.output.LogicalSize(); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; // Find largest positive local work size that is divider for global work size. - kd.lws0 = std::min(std::max(kd.gws0, static_cast(1)), static_cast(32)); - while (kd.gws0 % kd.lws0 != 0) { - --kd.lws0; + dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); + while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { + --dispatchData.lws[0]; } - kd.lws1 = 1; - kd.lws2 = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - return kd; + return dispatchData; } JitConstants ROIPoolingKernelBase::GetJitConstants(const roi_pooling_params& rp) const { @@ -59,7 +57,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params, return {}; } - DispatchData runInfo = SetDefault(orgParams); + DispatchData dispatchData = SetDefault(orgParams); KernelData kd = KernelData::Default(params); auto cldnn_jit = GetJitConstants(orgParams); @@ -67,7 +65,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); if (orgParams.mode == PoolType::DEFORMABLE_BILINEAR && !orgParams.no_trans) kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp index 352db1e..af73c0c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp @@ -87,7 +87,7 @@ static inline std::string GetOrderString(std::vector& order) { std::string order_str = order[0]; for (size_t i = 1; i < order.size(); i++) order_str += ", " + order[i]; - + return order_str; } @@ -114,7 +114,7 @@ static std::string GetUpdatesIndexOrder(const scatter_update_params& params, siz std::string FYX_indices_size = "(INPUT1_FEATURE_NUM * INPUT1_SIZE_Y * INPUT1_SIZE_X)"; std::string YX_indices_size = "(INPUT1_SIZE_Y * INPUT1_SIZE_X)"; std::string X_indices_size = "(INPUT1_SIZE_X)"; - + // Shift indices of ScatterUpdate updates input related to Indices dims for (size_t i = default_order.size() - 1; i > (axis + indices_non_empty_dims - 1); i--) default_order[i] = default_order[i - indices_non_empty_dims + 1]; @@ -141,76 +141,65 @@ static std::string GetUpdatesIndexOrder(const scatter_update_params& params, siz } CommonDispatchData ScatterUpdateKernelRef::SetDefault(const scatter_update_params& params, const optional_params&, bool is_second) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; const auto& output = params.output; - std::vector global(3); const size_t indices_size = params.inputs[1].LogicalSize(); switch (params.inputs[0].GetLayout()) { case DataLayout::bfyx: - global = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v}; + dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v}; if (is_second) { if (params.axis == ScatterUpdateAxis::BATCH) - global[2] = indices_size * output.Feature().v; + dispatchData.gws[2] = indices_size * output.Feature().v; else if (params.axis == ScatterUpdateAxis::FEATURE) - global[2] = indices_size * output.Batch().v; + dispatchData.gws[2] = indices_size * output.Batch().v; else if (params.axis == ScatterUpdateAxis::Y) - global[1] = indices_size; + dispatchData.gws[1] = indices_size; else - global[0] = indices_size; + dispatchData.gws[0] = indices_size; } break; case DataLayout::bfzyx: - global = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v}; + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v}; if (is_second) { if (params.axis == ScatterUpdateAxis::BATCH) - global[2] = indices_size * output.Feature().v; + dispatchData.gws[2] = indices_size * output.Feature().v; else if (params.axis == ScatterUpdateAxis::FEATURE) - global[2] = indices_size * output.Batch().v; + dispatchData.gws[2] = indices_size * output.Batch().v; else if (params.axis == ScatterUpdateAxis::Z) - global[1] = indices_size; + dispatchData.gws[1] = indices_size; else if (params.axis == ScatterUpdateAxis::Y) - global[0] = indices_size * output.X().v; + dispatchData.gws[0] = indices_size * output.X().v; else - global[0] = indices_size * output.Y().v; + dispatchData.gws[0] = indices_size * output.Y().v; } break; case DataLayout::bfwzyx: - global = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; if (is_second) { if (params.axis == ScatterUpdateAxis::BATCH) - global[2] = indices_size * output.Feature().v; + dispatchData.gws[2] = indices_size * output.Feature().v; else if (params.axis == ScatterUpdateAxis::FEATURE) - global[2] = indices_size * output.Batch().v; + dispatchData.gws[2] = indices_size * output.Batch().v; else if (params.axis == ScatterUpdateAxis::Z) - global[1] = indices_size * output.W().v; + dispatchData.gws[1] = indices_size * output.W().v; else if (params.axis == ScatterUpdateAxis::W) - global[1] = indices_size * output.Z().v; + dispatchData.gws[1] = indices_size * output.Z().v; else if (params.axis == ScatterUpdateAxis::Y) - global[0] = indices_size * output.X().v; + dispatchData.gws[0] = indices_size * output.X().v; else - global[0] = indices_size * output.Y().v; + dispatchData.gws[0] = indices_size * output.Y().v; } break; default: break; } - - std::vector local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - return runInfo; + return dispatchData; } static std::string GetOutputIndexOnAxis(const scatter_update_params& params, size_t axis) { @@ -270,7 +259,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o const scatter_update_params& orgParams = static_cast(params); const size_t indices_size = orgParams.inputs[1].LogicalSize(); int start_with_iteration = 0; - + // if dim of output along axis is equal to logical size of indices, we miss copying kernel if (orgParams.inputs[0].Extract(orgParams.inputs[0].GetLayout(), Tensor::DataChannelName(orgParams.axis), orgParams.inputs[0].GetDims()).v == indices_size) { start_with_iteration = 1; @@ -281,7 +270,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o auto cldnn_jit = GetJitConstants(newParams); for (int i = start_with_iteration; i < 2; i++) { - auto runInfo = SetDefault(newParams, options, (i == 1)); + auto dispatchData = SetDefault(newParams, options, (i == 1)); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); if (i == 1){ @@ -291,11 +280,11 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o clKernelData& kernel = kd.kernels[i - start_with_iteration]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3, GetFusedPrimitiveInputsCount(params)); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3, GetFusedPrimitiveInputsCount(params)); } kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; - + return {kd}; } } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp index 8463a0e..7807c85 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -98,7 +98,7 @@ JitConstants SelectKernelBase::GetJitConstants(const select_params& params) cons } SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params& params) const { - DispatchData kd; + DispatchData dispatchData; const auto& out = params.output; @@ -111,16 +111,12 @@ SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params& gws.push_back(1U); } - kd.gws0 = gws[0]; - kd.gws1 = gws[1]; - kd.gws2 = gws[2] * gws[3]; + dispatchData.gws[0] = gws[0]; + dispatchData.gws[1] = gws[1]; + dispatchData.gws[2] = gws[2] * gws[3]; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes({kd.gws0, kd.gws1, kd.gws2}, params.engineInfo); - kd.lws0 = local[0]; - kd.lws1 = local[1]; - kd.lws2 = local[2]; - - return kd; + return dispatchData; } KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const { @@ -135,12 +131,12 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); - DispatchData runInfo = SetDefault(newParams); + DispatchData dispatchData = SetDefault(newParams); auto& kernel = kd.kernels[0]; - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp index a128498..68ce924 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp @@ -55,23 +55,14 @@ bool ShuffleChannelsKernelRef::Validate(const Params& p, const optional_params& CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = {params.output.Batch().v, - params.output.Feature().v, - params.output.Y().v * params.output.X().v}; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Y().v * params.output.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_params& params) const { @@ -109,14 +100,14 @@ KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const assert(params.GetType() == KernelType::SHUFFLE_CHANNELS); - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp index 25ca988..5d9547a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp @@ -65,8 +65,8 @@ std::vector SoftmaxItemsClassKernelBase::GetSoftmaxDimGlobalSizes(Softma } } -JitConstants SoftmaxItemsClassKernelBase::GetJitConstants(const softmax_params& params, DispatchData kd) const { - auto jit = SoftmaxKernelBase::GetJitConstants(params, kd); +JitConstants SoftmaxItemsClassKernelBase::GetJitConstants(const softmax_params& params, DispatchData dispatchData) const { + auto jit = SoftmaxKernelBase::GetJitConstants(params, dispatchData); switch (params.dim) { case SoftmaxDim::X: diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h index 2d40103..987778c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.h @@ -24,7 +24,7 @@ public: virtual ~SoftmaxItemsClassKernelBase() {} protected: - JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const softmax_params& params, DispatchData dispatchData) const override; static ParamsKey GetDefaultSupportedKey(); static std::vector GetSoftmaxDimGlobalSizes(SoftmaxDim dim, const DataTensor& output); }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp index 065520e..77b3535 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp @@ -16,43 +16,42 @@ namespace kernel_selector { JitConstants SoftmaxKernelBase::GetJitConstants(const softmax_params& params, - SoftmaxKernelBase::DispatchData kd) const { + SoftmaxKernelBase::DispatchData dispatchData) const { JitConstants mem_consts = MakeBaseParamsJitConstants(params); mem_consts.AddConstants({MakeJitConstant("ALONG_" + toString(params.dim), "")}); mem_consts.AddConstants({ - MakeJitConstant("ITEMS_NUM", kd.itemsNum), - MakeJitConstant("LWS", kd.lws0), - MakeJitConstant("GWS", kd.gws0), - MakeJitConstant("DATA_SETS_COUNT", kd.dataSetsCount), - MakeJitConstant("DATA_SET_SIZE", kd.dataSetSize), - MakeJitConstant("LEFTOVERS", kd.leftovers), + MakeJitConstant("ITEMS_NUM", dispatchData.itemsNum), + MakeJitConstant("LWS", dispatchData.lws[0]), + MakeJitConstant("GWS", dispatchData.gws[0]), + MakeJitConstant("DATA_SETS_COUNT", dispatchData.dataSetsCount), + MakeJitConstant("DATA_SET_SIZE", dispatchData.dataSetSize), + MakeJitConstant("LEFTOVERS", dispatchData.leftovers), }); return mem_consts; } -SoftmaxKernelBase::DispatchData SoftmaxKernelBase::SetDefault(const softmax_params& params, +SoftmaxKernelBase::DispatchData SoftmaxKernelBase::SetDefault(const softmax_params&, const optional_params&) const { - DispatchData runInfo; + DispatchData dispatchData; - runInfo.gws0 = 1; - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = 1; + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; - runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - runInfo.leftovers = 0; - runInfo.itemsNum = 0; - runInfo.normIndex = 0; - runInfo.dataSetsCount = 0; - runInfo.dataSetSize = 0; + dispatchData.leftovers = 0; + dispatchData.itemsNum = 0; + dispatchData.normIndex = 0; + dispatchData.dataSetsCount = 0; + dispatchData.dataSetSize = 0; - return runInfo; + return dispatchData; } bool SoftmaxKernelBase::Validate(const Params& p, const optional_params& o) const { @@ -71,15 +70,15 @@ KernelsData SoftmaxKernelBase::GetCommonKernelsData(const Params& params, const const softmax_params& orgParams = static_cast(params); KernelData kd = KernelData::Default(params); - auto runInfo = SetDefault(orgParams, options); - auto cldnn_jit = GetJitConstants(orgParams, runInfo); + auto dispatchData = SetDefault(orgParams, options); + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); - kd.estimatedTime = runInfo.efficiency; + kd.estimatedTime = dispatchData.efficiency; return {kd}; } @@ -118,12 +117,12 @@ SoftmaxKernelBase::DispatchData SoftmaxKernelBaseBF::SetDefault(const softmax_pa const optional_params& options) const { const auto& input = params.inputs[0]; - DispatchData kd = Parent::SetDefault(params, options); + DispatchData dispatchData = Parent::SetDefault(params, options); auto flatten_input = input.FlattenFeatureAndSpatials(); - kd.dataSetSize = flatten_input.Feature().v; - kd.dataSetsCount = input.Batch().v; + dispatchData.dataSetSize = flatten_input.Feature().v; + dispatchData.dataSetsCount = input.Batch().v; - return kd; + return dispatchData; } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h index d01e91d..937d0eb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.h @@ -59,7 +59,7 @@ public: protected: virtual bool Validate(const Params&, const optional_params&) const; - virtual JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const; + virtual JitConstants GetJitConstants(const softmax_params& params, DispatchData dispatchData) const; virtual DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params& optParams) const; }; @@ -74,4 +74,4 @@ protected: bool Validate(const Params&, const optional_params&) const override; DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp index 8a33e17..ae5d29f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.cpp @@ -35,37 +35,37 @@ ParamsKey SoftmaxKernel_bf::GetSupportedKey() const { SoftmaxKernel_bf::Parent::DispatchData SoftmaxKernel_bf::SetDefault(const softmax_params& params, const optional_params& optParams) const { - auto kd = Parent::SetDefault(params, optParams); + auto dispatchData = Parent::SetDefault(params, optParams); // start with 1 thread per data set - kd.gws0 = 1; - kd.gws1 = kd.dataSetsCount; - kd.itemsNum = kd.dataSetSize; + dispatchData.gws[0] = 1; + dispatchData.gws[1] = dispatchData.dataSetsCount; + dispatchData.itemsNum = dispatchData.dataSetSize; - kd.normIndex = 0; + dispatchData.normIndex = 0; // We have two units of data per work item in current implementation. - auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float)); + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); // Combining device execution and local memory restrictions to compute maximum possible LWS. auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi); - kd.lws0 = 1; + dispatchData.lws[0] = 1; // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory // reads. - while ((kd.itemsNum > 32 || kd.lws0 < kd.itemsNum) && (2 * kd.lws0 <= max_lws)) { - kd.lws0 *= 2; - kd.itemsNum /= 2; + while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) { + dispatchData.lws[0] *= 2; + dispatchData.itemsNum /= 2; } - assert((kd.itemsNum + 1) * kd.lws0 >= kd.dataSetSize && "More than 'lws0' items per batch remains! Lws too small?"); + assert((dispatchData.itemsNum + 1) * dispatchData.lws[0] >= dispatchData.dataSetSize && "More than 'lws[0]' items per batch remains! Lws too small?"); - kd.gws0 = kd.lws0; - kd.leftovers = kd.dataSetSize % kd.lws0; + dispatchData.gws[0] = dispatchData.lws[0]; + dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0]; - assert(kd.itemsNum > 0 && kd.lws0 && kd.gws0 > 0); + assert(dispatchData.itemsNum > 0 && dispatchData.lws[0] && dispatchData.gws[0] > 0); - kd.efficiency = FORCE_PRIORITY_6; - return kd; + dispatchData.efficiency = FORCE_PRIORITY_6; + return dispatchData; } KernelsData SoftmaxKernel_bf::GetKernelsData(const Params& params, const optional_params& optionalParams) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp index 27d0a1d..f31a03d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.cpp @@ -35,36 +35,36 @@ ParamsKey SoftmaxKernel_fb::GetSupportedKey() const { SoftmaxKernel_fb::Parent::DispatchData SoftmaxKernel_fb::SetDefault(const softmax_params& params, const optional_params& optParams) const { - auto kd = Parent::SetDefault(params, optParams); + auto dispatchData = Parent::SetDefault(params, optParams); // start with 1 thread per data set - kd.gws0 = kd.dataSetsCount; - kd.gws1 = 1; - kd.itemsNum = kd.dataSetSize; + dispatchData.gws[0] = dispatchData.dataSetsCount; + dispatchData.gws[1] = 1; + dispatchData.itemsNum = dispatchData.dataSetSize; - kd.normIndex = 1; + dispatchData.normIndex = 1; // We have two units of data per work item in current implementation. - auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float)); + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); // Combining device execution and local memory restrictions to compute maximum possible LWS. auto max_lws = static_cast( std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi)); - kd.lws0 = std::min(kd.dataSetsCount, max_lws); + dispatchData.lws[0] = std::min(dispatchData.dataSetsCount, max_lws); // Compute maximum possible LWS that does not exceed device capabilities and optimizes number of global memory // reads. - while ((kd.itemsNum > 32 || kd.lws0 < kd.itemsNum) && (2 * kd.lws0 <= max_lws)) { - kd.lws0 *= 2; - kd.itemsNum /= 2; + while ((dispatchData.itemsNum > 32 || dispatchData.lws[0] < dispatchData.itemsNum) && (2 * dispatchData.lws[0] <= max_lws)) { + dispatchData.lws[0] *= 2; + dispatchData.itemsNum /= 2; } - kd.gws0 = kd.lws0; - kd.gws1 = 1; - kd.leftovers = (kd.dataSetSize * kd.dataSetsCount) % kd.lws0; + dispatchData.gws[0] = dispatchData.lws[0]; + dispatchData.gws[1] = 1; + dispatchData.leftovers = (dispatchData.dataSetSize * dispatchData.dataSetsCount) % dispatchData.lws[0]; - assert(kd.itemsNum > 0 && kd.lws0 && kd.gws0 > 0); + assert(dispatchData.itemsNum > 0 && dispatchData.lws[0] && dispatchData.gws[0] > 0); - kd.efficiency = FORCE_PRIORITY_6; - return kd; + dispatchData.efficiency = FORCE_PRIORITY_6; + return dispatchData; } bool kernel_selector::SoftmaxKernel_fb::Validate(const Params& params, const optional_params& o) const { @@ -74,8 +74,7 @@ bool kernel_selector::SoftmaxKernel_fb::Validate(const Params& params, const opt const auto& softmax_params = static_cast(params); - auto kd = Parent::SetDefault(softmax_params, o); - auto local_mem_per_wi = 2 * (kd.fp16UnitUsed ? sizeof(short) : sizeof(float)); + auto local_mem_per_wi = 2 * BytesPerElement(softmax_params.inputs[0].GetDType()); auto max_lws = static_cast( std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp index 4af7fc0..3d6cdef 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.cpp @@ -24,7 +24,7 @@ ParamsKey SoftmaxKerneItemsClassOptimized::GetSupportedKey() const { return GetD SoftmaxKerneItemsClassOptimized::Parent::DispatchData SoftmaxKerneItemsClassOptimized::SetDefault( const softmax_params& params, const optional_params& optParams) const { - auto runInfo = Parent::SetDefault(params, optParams); + auto dispatchData = Parent::SetDefault(params, optParams); auto& input = params.inputs[0]; @@ -50,30 +50,27 @@ SoftmaxKerneItemsClassOptimized::Parent::DispatchData SoftmaxKerneItemsClassOpti break; } - runInfo.gws0 = global[0]; - runInfo.gws1 = - global[1] * workitems_per_classes; // we multiply it by workitems_per_classes because we split computations of - // one "full item classes output" into multiple workitems by "full item - // classes output" i mean N outputs where N is number of item classes. - runInfo.gws2 = global[2]; + dispatchData.gws[0] = global[0]; + dispatchData.gws[1] = global[1] * workitems_per_classes; // we multiply it by workitems_per_classes because we split computations of + // one "full item classes output" into multiple workitems by "full item + // classes output" i mean N outputs where N is number of item classes. + dispatchData.gws[2] = global[2]; - runInfo.lws0 = 1; - runInfo.lws1 = workitems_per_classes; - runInfo.lws2 = 1; + dispatchData.lws = { 1, workitems_per_classes, 1 }; - runInfo.leftovers = item_class_count % workitems_per_classes; + dispatchData.leftovers = item_class_count % workitems_per_classes; if (item_class_count >= 32) { - runInfo.efficiency = FORCE_PRIORITY_7; + dispatchData.efficiency = FORCE_PRIORITY_7; } else { - runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; } - return runInfo; + return dispatchData; } -JitConstants SoftmaxKerneItemsClassOptimized::GetJitConstants(const softmax_params& params, DispatchData kd) const { - auto jit = SoftmaxItemsClassKernelBase::GetJitConstants(params, kd); +JitConstants SoftmaxKerneItemsClassOptimized::GetJitConstants(const softmax_params& params, DispatchData dispatchData) const { + auto jit = SoftmaxItemsClassKernelBase::GetJitConstants(params, dispatchData); jit.AddConstant(MakeJitConstant("WORKITEMS_PER_CLASSES", workitems_per_classes)); jit.AddConstant(MakeJitConstant("HAS_DRIVER_PROBLEMS", params.engineInfo.bIMADSupport)); @@ -84,4 +81,4 @@ KernelsData SoftmaxKerneItemsClassOptimized::GetKernelsData(const Params& params const optional_params& options) const { return GetCommonKernelsData(params, options); } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h index 2dfb35a..354f28b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h @@ -27,7 +27,7 @@ public: ParamsKey GetSupportedKey() const override; protected: - JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const override; + JitConstants GetJitConstants(const softmax_params& params, DispatchData dispatchData) const override; DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp index 809b9e5..8a3f7ec 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.cpp @@ -20,25 +20,17 @@ ParamsKey SoftmaxKernelRef::GetSupportedKey() const { return GetDefaultSupported SoftmaxKernelRef::Parent::DispatchData SoftmaxKernelRef::SetDefault(const softmax_params& params, const optional_params& optParams) const { - auto runInfo = Parent::SetDefault(params, optParams); + auto dispatchData = Parent::SetDefault(params, optParams); - const auto global = GetSoftmaxDimGlobalSizes(params.dim, params.output); + dispatchData.gws = GetSoftmaxDimGlobalSizes(params.dim, params.output); - assert(global.size() == 3); + assert(dispatchData.gws.size() == 3); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; + dispatchData.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - runInfo.efficiency = DONT_USE_IF_HAVE_SOMETHING_ELSE; - - return runInfo; + return dispatchData; } KernelsData SoftmaxKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp index 169ff62..1b21b66 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_batch/space_to_batch_kernel_base.cpp @@ -41,27 +41,16 @@ bool SpaceToBatchKernelBase::Validate(const Params& p, const optional_params& o) CommonDispatchData SpaceToBatchKernelBase::SetDefault(const space_to_batch_params& params, const optional_params&) const { const auto& out = params.output; - CommonDispatchData runInfo; - std::vector global; - std::vector local; - + CommonDispatchData dispatchData; if (out.GetLayout() == DataLayout::b_fs_yx_fsv16 && out.Feature().v % 16 == 0) { - global = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v }; - local = {1, 16, 1}; + dispatchData.gws = { out.Batch().v, out.Feature().v, out.Y().v * out.X().v }; + dispatchData.lws = {1, 16, 1}; } else { - global = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v }; - local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + dispatchData.gws = { out.Batch().v, out.Feature().v, out.W().v * out.Z().v * out.Y().v * out.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); } - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants SpaceToBatchKernelBase::GetJitConstants(const space_to_batch_params& params) const { @@ -101,14 +90,14 @@ KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, c return {}; } - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 1, GetFusedPrimitiveInputsCount(params)); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp index 8a0b228..23337fb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.cpp @@ -59,23 +59,14 @@ bool SpaceToDepthKernelRef::Validate(const Params& p, const optional_params& o) CommonDispatchData SpaceToDepthKernelRef::SetDefault(const space_to_depth_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; - std::vector global = {params.output.Batch().v, - params.output.Feature().v, - params.output.Z().v * params.output.Y().v * params.output.X().v}; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Z().v * params.output.Y().v * params.output.X().v }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; - - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; - - return runInfo; + return dispatchData; } JitConstants SpaceToDepthKernelRef::GetJitConstants(const space_to_depth_params& params) const { @@ -111,14 +102,14 @@ KernelsData SpaceToDepthKernelRef::GetKernelsData(const Params& params, const op return {}; } - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp index b5b0acf..5c3bbcc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp @@ -89,25 +89,18 @@ bool StridedSliceKernelRef::Validate(const Params& p, const optional_params& o) } CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; // If the new_axis_mask is set, then begin, end, and stride are ignored // and a new length 1 dimension is adding. Input data just copying to output // TODO: remove data copying in case where only shape size changing - std::vector gws = {params.output.Batch().v, params.output.Feature().v, - params.output.Z().v * params.output.Y().v * params.output.X().v}; + dispatchData.gws = { params.output.Batch().v, + params.output.Feature().v, + params.output.Z().v * params.output.Y().v * params.output.X().v }; - auto lws = GetOptimalLocalWorkGroupSizes(gws, params.engineInfo); + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - runInfo.gws0 = gws[0]; - runInfo.gws1 = gws[1]; - runInfo.gws2 = gws[2]; - - runInfo.lws0 = lws[0]; - runInfo.lws1 = lws[1]; - runInfo.lws2 = lws[2]; - - return runInfo; + return dispatchData; } JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params& params) const { @@ -167,14 +160,14 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op assert(params.GetType() == KernelType::STRIDED_SLICE); - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp index dc95efd..de2a1e9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ ParamsKey TileKernelRef::GetSupportedKey() const { } CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const { - CommonDispatchData runInfo; + CommonDispatchData dispatchData; auto in = params.inputs[0]; @@ -77,26 +77,24 @@ CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const op } if (inner_size > 1) { - runInfo.gws0 = outer_size; - runInfo.gws1 = inner_size; - runInfo.gws2 = 1; + dispatchData.gws[0] = outer_size; + dispatchData.gws[1] = inner_size; + dispatchData.gws[2] = 1; - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 1; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } else { - runInfo.gws0 = Align(outer_size, 16); - runInfo.gws1 = 1; - runInfo.gws2 = 1; + dispatchData.gws[0] = Align(outer_size, 16); + dispatchData.gws[1] = 1; + dispatchData.gws[2] = 1; - runInfo.lws0 = 16; - runInfo.lws1 = 1; - runInfo.lws2 = 1; + dispatchData.lws[0] = 16; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; } - runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; - - return runInfo; + return dispatchData; } JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const { @@ -135,14 +133,14 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p KernelData kd = KernelData::Default(params); tile_params& newParams = *static_cast(kd.params.get()); - auto runInfo = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto cldnn_jit = GetJitConstants(newParams); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp index a6f3bb2..4bf514b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp @@ -160,30 +160,6 @@ std::shared_ptr common_kernel_base::GetKernelString(const std::str return kernel_string; } -static void Check_RunInfoData(const std::string& kernelName, const kernel_selector::CommonDispatchData& runInfo) { - if (runInfo.lws0 * runInfo.lws1 * runInfo.lws2 > 256) { - std::cout << "ERROR: dispatch data for kernel: " << kernelName << " LWS cannot be greater than 256!\n" - << std::endl; - } - if (runInfo.gws0 == 0 || runInfo.gws1 == 0 || runInfo.gws2 == 0 || runInfo.lws0 == 0 || runInfo.lws1 == 0 || - runInfo.lws2 == 0) { - std::cout << "ERROR: dispatch data for kernel: " << kernelName << " dispatch data cannot contain zeros!" - << std::endl; - } - if (runInfo.gws0 % runInfo.lws0 != 0) { - std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS0: " << runInfo.gws0 - << " LWS0: " << runInfo.lws0 << std::endl; - } - if (runInfo.gws1 % runInfo.lws1 != 0) { - std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS1: " << runInfo.gws1 - << " LWS1: " << runInfo.lws1 << std::endl; - } - if (runInfo.gws2 % runInfo.lws2 != 0) { - std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS2: " << runInfo.gws2 - << " LWS2: " << runInfo.lws2 << std::endl; - } -} - uint32_t common_kernel_base::GetFusedPrimitiveInputsCount(const Params ¶ms) const { auto p = dynamic_cast(params); uint32_t fused_deps_total = 0; @@ -195,7 +171,7 @@ uint32_t common_kernel_base::GetFusedPrimitiveInputsCount(const Params ¶ms) } void common_kernel_base::FillCLKernelData(clKernelData& kernel, - const CommonDispatchData& runInfo, + const CommonDispatchData& dispatchData, const EngineInfo& engine_info, const std::string& kernelMapName, const std::string& jit, @@ -205,11 +181,10 @@ void common_kernel_base::FillCLKernelData(clKernelData& kernel, bool bias, int number_of_inputs, uint32_t number_of_inputs_for_fused_prims) const { - Check_RunInfoData(kernelMapName, runInfo); - kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2}; - kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2}; + CheckDispatchData(kernelMapName, dispatchData); + kernel.workGroups.global = dispatchData.gws; + kernel.workGroups.local = dispatchData.lws; kernel.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode); - kernel.arguments = - GetArgsDesc(number_of_inputs, weights, bias, number_of_inputs_for_fused_prims); + kernel.arguments = GetArgsDesc(number_of_inputs, weights, bias, number_of_inputs_for_fused_prims); } } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h index 3dc1c5f..cb89cc1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h @@ -21,16 +21,6 @@ #include namespace kernel_selector { -struct CommonDispatchData { - // TODO: change it to std::vector - size_t gws0, gws1, gws2; - size_t lws0, lws1, lws2; - bool - fp16UnitUsed; ///< Value indicating that FP16 half precision floating point type will be used (instead of single precision). - float efficiency; - - CommonDispatchData() : gws0(0), gws1(0), gws2(0), lws0(0), lws1(0), lws2(0), fp16UnitUsed(false), efficiency(0.0f){} -}; class common_kernel_base : public KernelBase { public: @@ -58,7 +48,7 @@ protected: uint32_t GetFusedPrimitiveInputsCount(const Params ¶ms) const; void FillCLKernelData(clKernelData& kernel, - const CommonDispatchData& runInfo, + const CommonDispatchData& dispatchData, const EngineInfo& engine_info, const std::string& kernel_map_name, const std::string& jit, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index d566440..ae87204 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -475,14 +475,16 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant; - if (l == WeightsLayout::oiyx || l == WeightsLayout::oizyx || l == WeightsLayout::goiyx || + if (l == WeightsLayout::oiyx || + l == WeightsLayout::oizyx || + l == WeightsLayout::goiyx || l == WeightsLayout::goizyx) { args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x"}; - const auto name = toString(l); - this->calcFunction = FuncBody(name); - this->macroName = MacroName(name, macroNameArgs); + this->calcFunction = FuncBody(layout_name); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); this->macroBody = R"V0G0N( \ CAT(prefix, _OFFSET) + \ (x)*CAT(prefix, _X_PITCH) + \ @@ -495,9 +497,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantcalcFunction = FuncBody(name); - this->macroName = MacroName(name, macroNameArgs); + this->calcFunction = FuncBody(layout_name); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); this->macroBody = R"V0G0N( \ CAT(prefix, _OFFSET) + \ (g)*CAT(prefix, _GROUPS_PITCH) + \ @@ -515,9 +516,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantcalcFunction = FuncBody(name); - this->macroName = MacroName(name, macroNameArgs); + this->calcFunction = FuncBody(layout_name); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); this->macroBody = R"V0G0N( \ CAT(prefix, _OFFSET) + \ (g * CAT(prefix, _GROUPS_PITCH)) + \ @@ -532,9 +532,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantcalcFunction = FuncBody(name); - this->macroName = MacroName(name, macroNameArgs); + this->calcFunction = FuncBody(layout_name); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); this->macroBody = R"V0G0N( \ CAT(prefix, _OFFSET) + \ (g)*CAT(prefix, _GROUPS_PITCH) + \ @@ -552,7 +551,6 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantmacroName = MacroName(name, macroNameArgs); - this->calcFunction = FuncBody(name, funcArgs, body); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); + this->calcFunction = FuncBody(layout_name, funcArgs, body); if (l == WeightsLayout::os_is_yx_osv16_isv16) - this->macroBody = FuncCall(name, {"o", "i", "0", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), "1", Cat("_IFM_NUM"), Cat("_OFM_NUM"), "16", "16"}); + this->macroBody = FuncCall(layout_name, {"o", "i", "0", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), "1", Cat("_IFM_NUM"), Cat("_OFM_NUM"), "16", "16"}); else if (l == WeightsLayout::os_is_zyx_osv32_isv16) - this->macroBody = FuncCall(name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "32", "16"}); + this->macroBody = FuncCall(layout_name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "32", "16"}); else if (l == WeightsLayout::os_is_zyx_osv64_isv16) - this->macroBody = FuncCall(name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "64", "16"}); + this->macroBody = FuncCall(layout_name, {"o", "i", "z", "y", "x", Cat("_SIZE_X"), Cat("_SIZE_Y"), Cat("_SIZE_Z"), Cat("_IFM_NUM"), Cat("_OFM_NUM"), "64", "16"}); } else if (l == WeightsLayout::g_os_zyx_is_osv16_isv16 || l == WeightsLayout::g_os_zyx_is_osv16_isv32 || l == WeightsLayout::g_os_zyx_is_osv32_isv16 || l == WeightsLayout::g_os_zyx_is_osv32_isv32) { args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x"}; args funcArgs = {"g", "o", "i", "z", "y", "x", "g_size", "o_size", "i_size", "z_size", "y_size", "x_size", "osv", "isv"}; - const auto name = toString(l); const auto body = R"V0G0N( \ uint is_size = (i_size + isv - 1) / isv; \ uint os_size = (o_size + osv - 1) / osv; \ @@ -612,8 +609,8 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantmacroName = MacroName(name, macroNameArgs); - this->calcFunction = FuncBody(name, funcArgs, body); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); + this->calcFunction = FuncBody(layout_name, funcArgs, body); std::string osv = "16", isv = "16"; if (l == WeightsLayout::g_os_zyx_is_osv16_isv16) { osv = "16"; isv = "16"; @@ -624,12 +621,11 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantmacroBody = FuncCall(name, {"g", "o", "i", "z", "y", "x", Cat("_GROUPS_NUM"), Cat("_OFM_NUM"), Cat("_IFM_NUM"), Cat("_SIZE_Z"), - Cat("_SIZE_Y"), Cat("_SIZE_X"), osv, isv}); + this->macroBody = FuncCall(layout_name, {"g", "o", "i", "z", "y", "x", Cat("_GROUPS_NUM"), Cat("_OFM_NUM"), Cat("_IFM_NUM"), Cat("_SIZE_Z"), + Cat("_SIZE_Y"), Cat("_SIZE_X"), osv, isv}); } else if (l == WeightsLayout::os_is_yx_osv16_isv4 || l == WeightsLayout::os_is_yx_osv32_isv4) { args macroNameArgs = {"prefix", "o", "i", "y", "x"}; args funcArgs = {"o", "i", "y", "x", "i_size", "o_size", "x_size", "otd"}; - const auto name = toString(l); const auto body = R"V0G0N( \ uint out_depth_tile = o / otd; \ uint od = o - out_depth_tile * otd; \ @@ -644,12 +640,12 @@ class WeightTensorJitConstant : public TensorBaseTJitConstantmacroName = MacroName(name, macroNameArgs); - this->calcFunction = FuncBody(name, funcArgs, body); + this->macroName = MacroName(tensor_name, layout_name, macroNameArgs); + this->calcFunction = FuncBody(layout_name, funcArgs, body); if (l == WeightsLayout::os_is_yx_osv16_isv4) - this->macroBody = FuncCall(name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "16"}); + this->macroBody = FuncCall(layout_name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "16"}); else if (l == WeightsLayout::os_is_yx_osv32_isv4) - this->macroBody = FuncCall(name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "32"}); + this->macroBody = FuncCall(layout_name, {"o", "i", "y", "x", Cat("_IFM_PITCH"), Cat("_OFM_PITCH"), Cat("_SIZE_X"), "32"}); } else { // throw error? } @@ -667,12 +663,12 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant args) { + static const std::string MacroName(std::string tensor_name, std::string layout_name, std::initializer_list args) { std::string args_str = ""; size_t counter = 0; for (auto& arg : args) args_str += (++counter == args.size()) ? (arg) : (arg + ", "); - return "GET_WEIGHTS_" + name + "_INDEX(" + args_str + ")"; + return "GET_" + tensor_name + "_" + layout_name + "_INDEX(" + args_str + ")"; } static const std::string FuncBody(std::string name, std::initializer_list args = {}, std::string body = "return 0;") { @@ -727,7 +723,9 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const { std::string index_func_val; auto layout = _tensor.GetLayout(); - WeightIndexFuncDesc indexFuncDesc {layout}; + auto layout_str = toString(layout); + WeightIndexFuncDesc indexFuncDesc{_name, layout}; + std::string called_func_name = "GET_" + _name + "_" + layout_str + "_INDEX"; if (WeightsTensor::DoesGroupDimExist(layout)) { if (WeightsTensor::ChannelsCount(layout) <= 5) { std::vector grouped_4d_channels = { @@ -740,15 +738,14 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const { bool is_grouped_4d_layout = is_common_nd_layout(grouped_4d_channels, layout); if (is_grouped_4d_layout) { index_macro_name = _name + "_GET_INDEX(g, o, i, y, x)"; - auto layout_str = toString(layout); if (layout == WeightsLayout::goiyx) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x)"; else if (layout == WeightsLayout::g_os_is_yx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x, 16)"; else if (layout == WeightsLayout::g_os_iyx_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, y, x, 16)"; else if (layout == WeightsLayout::g_is_os_yx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x, 16)"; } else { assert(0); } @@ -764,13 +761,12 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const { bool is_grouped_5d_layout = is_common_nd_layout(grouped_5d_channels, layout); if (is_grouped_5d_layout) { index_macro_name = _name + "_GET_INDEX(g, o, i, z, y, x)"; - auto layout_str = toString(layout); if (layout == WeightsLayout::goizyx) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, z, y, x)"; else if (layout == WeightsLayout::g_os_is_zyx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, z, y, x, 16)"; else if (layout == WeightsLayout::g_is_os_zyx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", g, o, i, z, y, x, 16)"; } else { assert(0); } @@ -786,19 +782,18 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const { bool is_common_4d_layout = is_common_nd_layout(base_4d_channels, layout); if (is_common_4d_layout) { index_macro_name = _name + "_GET_INDEX(o, i, y, x)"; - auto layout_str = toString(layout); if (layout == WeightsLayout::oiyx) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, 0, y, x)"; else if (layout == WeightsLayout::os_is_yx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, 0, y, x, 16)"; else if (layout == WeightsLayout::os_iyx_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, y, x, 16)"; else if (layout == WeightsLayout::os_iyx_osv32 || layout == WeightsLayout::os_iyx_osv32__ai32) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, y, x, 32)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, y, x, 32)"; else if (layout == WeightsLayout::is_os_yx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, 0, y, x, 16)"; else if (layout == WeightsLayout::os_is_yx_osv16_isv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", o, i, 0, y, x)"; + index_func_val = called_func_name + "(" + _name + ", o, i, 0, y, x)"; } else { assert(0); } @@ -813,15 +808,14 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const { bool is_common_5d_layout = is_common_nd_layout(base_5d_channels, layout); if (is_common_5d_layout) { index_macro_name = _name + "_GET_INDEX(o, i, z, y, x)"; - auto layout_str = toString(layout); if (layout == WeightsLayout::oizyx) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, z, y, x)"; else if (layout == WeightsLayout::os_is_zyx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, z, y, x, 16)"; else if (layout == WeightsLayout::is_os_zyx_isv16_osv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x, 16)"; + index_func_val = called_func_name + "(" + _name + ", 0, o, i, z, y, x, 16)"; else if (layout == WeightsLayout::os_is_zyx_osv32_isv16 || layout == WeightsLayout::os_is_zyx_osv64_isv16) - index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", o, i, z, y, x)"; + index_func_val = called_func_name + "(" + _name + ", o, i, z, y, x)"; } else { assert(0); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp index 3d23771..acb26f7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp @@ -15,10 +15,51 @@ #include "kernel_base.h" +#include + namespace kernel_selector { const primitive_db KernelBase::db; thread_local size_t KernelBase::counter = 0; +std::string toString(const kernel_selector::CommonDispatchData& dispatchData) { + auto gws = dispatchData.gws; + auto lws = dispatchData.lws; + std::stringstream os; + os << "GWS(" << gws.size() << "): "; + for (auto e : gws) { + os << e << " "; + } + os << "LWS(" << lws.size() << "): "; + for (auto e : lws) { + os << e << " "; + } + return os.str(); +} + +void KernelBase::CheckDispatchData(const std::string& kernelName, const kernel_selector::CommonDispatchData& dispatchData) { + if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3) + throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName + ": " + + ": LWS and GWS size is expected to be equal to 3. Actual: " + + toString(dispatchData)); + + if (dispatchData.lws[0] * dispatchData.lws[1] * dispatchData.lws[2] > 256) { + throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName + + ": LWS cannot be greater than 256. Actual: " + + toString(dispatchData)); + } + for (size_t i = 0; i < dispatchData.gws.size(); i++) { + if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0) + throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName + + ": Dispatch data cannot contain zeros. Actual: " + + toString(dispatchData)); + + if (dispatchData.gws[i] % dispatchData.lws[i] != 0) + throw std::runtime_error("ERROR: Invalid dispatch data for kernel: " + kernelName + + ": GWS must be divisible by corresponding LWS. Actual: " + + toString(dispatchData)); + } +} + static bool IsTypeUsedIn(Datatype type, const base_params& params) { return params.output.GetDType() == type || std::any_of(params.inputs.begin(), params.inputs.end(), [=](const DataTensor& input) -> bool { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h index 3b2aa55..a795b5f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h @@ -26,6 +26,20 @@ namespace kernel_selector { using primitive_db = kernel_selector::gpu::cache::primitive_db; +struct CommonDispatchData { + std::vector gws; + std::vector lws; + float efficiency; + + CommonDispatchData() : gws({0, 0, 0}), lws({0, 0, 0}), efficiency(0.0f) {} +}; + +std::string toString(const kernel_selector::CommonDispatchData& dispatchData); + +static inline std::ostream &operator<<(std::ostream &os, CommonDispatchData disptchData) { + return os << toString(disptchData); +} + class KernelBase { public: using FusedOpType = KernelType; @@ -56,6 +70,7 @@ protected: static const primitive_db db; const std::string kernelName; + static void CheckDispatchData(const std::string& kernelName, const kernel_selector::CommonDispatchData& dispatchData); static size_t UniqeID() { return counter++; } // TODO: use interlocked virtual Datatype GetUnitType(const base_params& params) const;