From a05333217cb08e8d8a9c52803785b80a8995b12c Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Thu, 1 Oct 2020 11:41:51 +0300 Subject: [PATCH] Support operation Interpolate-4 in OpenVINO (#1596) JIRA: 26973 --- .../src/cldnn_engine/cldnn_program.cpp | 213 +++- inference-engine/src/cldnn_engine/cldnn_program.h | 2 + .../single_layer_tests/interpolate.cpp | 25 +- .../plugin/cpu/single_layer_tests/interpolate.cpp | 35 +- .../single_layer_tests/interpolate.cpp | 125 +++ .../shared_tests_instances/skip_tests_config.cpp | 4 +- .../include/single_layer_tests/interpolate.hpp | 5 +- .../shared/src/single_layer_tests/interpolate.cpp | 28 +- .../ngraph_functions/utils/ngraph_helpers.hpp | 2 + .../ngraph_functions/src/utils/ngraph_helpers.cpp | 14 + inference-engine/thirdparty/clDNN/api/resample.hpp | 134 ++- .../clDNN/kernel_selector/common/common_types.h | 32 + .../resample/resample_kernel_base.cpp | 117 +- .../actual_kernels/resample/resample_kernel_base.h | 13 +- .../resample/resample_kernel_ref.cpp | 1 + .../core/cl_kernels/resample_opt.cl | 8 +- .../core/cl_kernels/resample_ref.cl | 415 +++++-- .../core/kernel_selector_common.cpp | 24 + .../kernel_selector/core/kernel_selector_common.h | 2 + .../thirdparty/clDNN/src/gpu/resample_gpu.cpp | 81 +- .../clDNN/src/include/kernel_selector_helper.h | 4 + inference-engine/thirdparty/clDNN/src/resample.cpp | 54 +- .../clDNN/tests/test_cases/resample_gpu_test.cpp | 1127 +++++++++++++++++++- .../ngraph/runtime/reference/interpolate.hpp | 2 +- 24 files changed, 2273 insertions(+), 194 deletions(-) create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index d6405ed..b78288b 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -770,6 +770,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) { { "Copy" , Copy }, { "Resample" , Resample }, { "Interp" , Interp }, + { "Interpolate" , Interpolate }, { "RegionYolo" , RegionYolo }, { "ReorgYolo" , ReorgYolo }, { "Const" , ConstantBlob }, @@ -1469,6 +1470,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng break; case Interp: CreateInterpPrimitive(topology, layer); break; + case Interpolate: CreateInterpolatePrimitive(topology, layer); + break; case ArgMax: case ArgMin: CreateArgMaxMinPrimitive(topology, layer, LayerTypeFromStr(layer->type)); @@ -3342,8 +3345,14 @@ void Program::CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine:: auto outDims = layer->outData[0]->getTensorDesc().getDims(); auto outTensor = CldnnTensorFromIEDims(outDims); + std::vector pads_begin(outDims.size(), 0); + std::vector pads_end(outDims.size(), 0); int pad_begin = interpLayer->GetParamAsInt("pad_beg_", 0); int pad_end = interpLayer->GetParamAsInt("pad_end_", 0); + for (size_t i = 2; i < pads_begin.size(); ++i) { + pads_begin[i] = pad_begin; + pads_end[i] = pad_end; + } int align_corners = interpLayer->GetParamAsInt("align_corners", 1); std::string resampleLayerName = layer_type_name_ID(layer); @@ -3352,8 +3361,8 @@ void Program::CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine:: resampleLayerName, inputPrimitives[0], outTensor, - pad_begin, - pad_end, + pads_begin, + pads_end, align_corners, cldnn::resample_type::bilinear); @@ -3361,6 +3370,202 @@ void Program::CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine:: AddPrimitiveToProfiler(resampleLayerName, layer); } +static cldnn::coordinate_transformation_mode CoordinateTransformationModeFromString(const std::string &str) { + static const caseless_map CoordTransformationMode = { + { "half_pixel" , cldnn::coordinate_transformation_mode::half_pixel }, + { "pytorch_half_pixel" , cldnn::coordinate_transformation_mode::pytorch_half_pixel }, + { "asymmetric" , cldnn::coordinate_transformation_mode::asymmetric }, + { "tf_half_pixel_for_nn" , cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn }, + { "align_corners" , cldnn::coordinate_transformation_mode::align_corners }, + }; + auto it = CoordTransformationMode.find(str); + if (it != CoordTransformationMode.end()) + return it->second; + else + THROW_CLDNN_EXCEPTION("Unknown coordinate transformation mode: " << str); +} + +static cldnn::nearest_mode NearestModeFromString(const std::string &str) { + static const caseless_map NearestMode = { + { "round_prefer_floor" , cldnn::nearest_mode::round_prefer_floor }, + { "round_prefer_ceil" , cldnn::nearest_mode::round_prefer_ceil }, + { "floor" , cldnn::nearest_mode::floor }, + { "ceil" , cldnn::nearest_mode::ceil }, + { "simple" , cldnn::nearest_mode::simple }, + }; + auto it = NearestMode.find(str); + if (it != NearestMode.end()) + return it->second; + else + THROW_CLDNN_EXCEPTION("Unknown nearest mode: " << str); +} + +static cldnn::shape_calculation_mode ShapeCalculationModeFromString(const std::string &str) { + static const caseless_map shapeCalcMode = { + { "sizes" , cldnn::shape_calculation_mode::sizes }, + { "scales" , cldnn::shape_calculation_mode::scales }, + }; + auto it = shapeCalcMode.find(str); + if (it != shapeCalcMode.end()) + return it->second; + else + THROW_CLDNN_EXCEPTION("Unknown shape calculation mode: " << str); +} + +inline cldnn::resample::resample_axis InterpolateAxisFromIEAxis(int axis, unsigned sz) { + if (axis < 0) + axis += sz; + if (axis < 0 || axis >= sz) + THROW_CLDNN_EXCEPTION("Interpolate axis is not correspond to number of dimensions"); + + // Difference in dimension ordering between IE and clDNN, + // reverse spatial dimensions after batch and feature. + unsigned cldnn_axis = axis; + if (axis >= 2) { + auto spatial_axis = axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(sz, 4u) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; + } + + switch (cldnn_axis) { + case 0: + return cldnn::resample::resample_axis::along_b; + case 1: + return cldnn::resample::resample_axis::along_f; + case 2: + return cldnn::resample::resample_axis::along_x; + case 3: + return cldnn::resample::resample_axis::along_y; + case 4: + return cldnn::resample::resample_axis::along_z; + case 5: + return cldnn::resample::resample_axis::along_w; + default: + break; + } + THROW_CLDNN_EXCEPTION("Unsupported Interpolate axis: " << axis); +} + +void Program::CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, {3, 4}); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto interpolateLayer = as (layer); + + std::shared_ptr insData0 = layer->insData[0].lock(); + IE_ASSERT(insData0 != nullptr); + auto insData0dims = insData0->getTensorDesc().getDims(); + auto outDims = layer->outData[0]->getTensorDesc().getDims(); + auto outTensor = CldnnTensorFromIEDims(outDims); + + auto pads_begin = interpolateLayer->GetParamAsInts("pads_begin", {}); + auto pads_end = interpolateLayer->GetParamAsInts("pads_end", {}); + for (size_t i = pads_begin.size(); i < outDims.size() || i < 4; ++i) + pads_begin.push_back(0); + for (size_t i = pads_end.size(); i < outDims.size() || i < 4; ++i) + pads_end.push_back(0); + std::string mode = interpolateLayer->GetParamAsString("mode"); + std::string shape_calc_mode = interpolateLayer->GetParamAsString("shape_calculation_mode"); + std::string coordinate_trans_mode = interpolateLayer->GetParamAsString("coordinate_transformation_mode", "half_pixel"); + std::string nearest_mode = interpolateLayer->GetParamAsString("nearest_mode", "round_prefer_floor"); + int antialias = interpolateLayer->GetParamAsBool("antialias", false); + float cube_coeff = interpolateLayer->GetParamAsFloat("cube_coeff", -0.75f); + + std::string resampleLayerName = layer_type_name_ID(layer); + auto cldnnSampleType = ResampleTypeFromString(mode); + auto shapeCalcMode = ShapeCalculationModeFromString(shape_calc_mode); + auto coordTransMode = CoordinateTransformationModeFromString(coordinate_trans_mode); + auto nearestMode = NearestModeFromString(nearest_mode); + + std::vector scales; + auto scalesInput = layer->insData[2].lock(); + auto scalesInputCreator = getCreatorLayer(scalesInput).lock(); + if (scalesInputCreator->blobs.size() == 1) { + auto constantBlob = scalesInputCreator->blobs.begin()->second; + auto axesPrecision = constantBlob->getTensorDesc().getPrecision(); + if (axesPrecision == InferenceEngine::Precision::FP32) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + scales.push_back(data[i]); + } else { + THROW_IE_EXCEPTION << layer->name << " Incorrect scales input precision"; + } + } + + std::vector axes; + if (inputPrimitives.size() == 4) { + auto axesInput = layer->insData[3].lock(); + auto axesInputCreator = getCreatorLayer(axesInput).lock(); + if (axesInputCreator->blobs.size() == 1) { + auto constantBlob = axesInputCreator->blobs.begin()->second; + auto axesPrecision = constantBlob->getTensorDesc().getPrecision(); + if (axesPrecision == InferenceEngine::Precision::I32) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + axes.push_back(InterpolateAxisFromIEAxis(data[i], insData0dims.size())); + } else if (axesPrecision == InferenceEngine::Precision::I64) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + axes.push_back(InterpolateAxisFromIEAxis(static_cast(data[i]), insData0dims.size())); + } else { + THROW_IE_EXCEPTION << layer->name + << " Incorrect axes input precision"; + } + } + } else { + for (int i = 0; i < insData0dims.size(); ++i) { + axes.push_back(InterpolateAxisFromIEAxis(i, insData0dims.size())); + } + } + + if (axes.size() != scales.size()) + THROW_IE_EXCEPTION << layer->name << " Incorrect axes and scales should be the same size"; + + cldnn::resample::AxesAndScales axesAndScales; + for (size_t i = 0; i < axes.size(); ++i) { + axesAndScales[axes[i]] = scales[i]; + } + + if (cldnnSampleType == cldnn::resample_type::linear_onnx) { + if (insData0dims.size() != 2 && insData0dims.size() != 4) + THROW_CLDNN_EXCEPTION("mode 'linear_onnx' supports only 2D or 4D tensors"); + if (axes.size() != 2 && insData0dims.size() != axes.size()) + THROW_CLDNN_EXCEPTION("mode 'linear_onnx' supports only axes with size 2 or equal to input rank"); + bool correctAxes = + ((axes[0] == cldnn::resample::resample_axis::along_b) && + (axes[1] == cldnn::resample::resample_axis::along_f)) || + ((axes[0] == cldnn::resample::resample_axis::along_y) && + (axes[1] == cldnn::resample::resample_axis::along_x)); + if (axes.size() == 4 && insData0dims.size() == 4) { + correctAxes = axes[0] == cldnn::resample::resample_axis::along_b && + axes[1] == cldnn::resample::resample_axis::along_f && + axes[2] == cldnn::resample::resample_axis::along_y && + axes[3] == cldnn::resample::resample_axis::along_x; + } + if (!correctAxes) + THROW_CLDNN_EXCEPTION( + "mode 'linear_onnx' supports only case when axes = {2, 3} or " + "axes = {0, 1} or axes = {0, 1, 2, 3}"); + } + + auto resamplePrim = cldnn::resample( + resampleLayerName, + inputPrimitives[0], + outTensor, + axesAndScales, + pads_begin, + pads_end, + antialias, + cube_coeff, + cldnnSampleType, + shapeCalcMode, + coordTransMode, + nearestMode); + + topology.add(resamplePrim); + AddPrimitiveToProfiler(resampleLayerName, layer); +} + void Program::CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { ValidateLayer(layer, 1); auto inputPrimitives = GetPrevLayersPrimitives(layer); @@ -5676,6 +5881,10 @@ cldnn::resample_type Program::ResampleTypeFromString(const std::string &str) { { "caffe.ResampleParameter.LINEAR" , cldnn::resample_type::caffe_bilinear }, { "caffe.ResampleParameter.NEAREST" , cldnn::resample_type::nearest }, { "Interp" , cldnn::resample_type::bilinear }, + { "linear" , cldnn::resample_type::caffe_bilinear }, + { "linear_onnx" , cldnn::resample_type::linear_onnx }, + { "cubic" , cldnn::resample_type::cubic }, + { "nearest" , cldnn::resample_type::nearest }, }; auto it = UpsamplingTypeNameToType.find(str); if (it != UpsamplingTypeNameToType.end()) diff --git a/inference-engine/src/cldnn_engine/cldnn_program.h b/inference-engine/src/cldnn_engine/cldnn_program.h index 989a3c6..67a466c 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.h +++ b/inference-engine/src/cldnn_engine/cldnn_program.h @@ -163,6 +163,7 @@ public: Copy, Resample, Interp, + Interpolate, RegionYolo, ReorgYolo, ConstantBlob, @@ -346,6 +347,7 @@ private: void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); void CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp index b11efc5..1c0e1c7 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp @@ -42,6 +42,11 @@ const std::vector coordina ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners, }; +const std::vector shapeCalculationMode = { + ngraph::op::v4::Interpolate::ShapeCalcMode::sizes, + ngraph::op::v4::Interpolate::ShapeCalcMode::scales, +}; + const std::vector nearestModes = { ngraph::op::v4::Interpolate::NearestMode::simple, ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor, @@ -55,7 +60,7 @@ const std::vector defaultNearestMode = }; const std::vector> pads = { - // {0, 0, 1, 1}, + {0, 0, 1, 1}, {0, 0, 0, 0}, }; @@ -69,23 +74,37 @@ const std::vector cubeCoefs = { -0.75f, }; +const std::vector> defaultAxes = { + {2, 3} +}; + +const std::vector> defaultScales = { + {1.33333f, 1.33333f} +}; + const auto interpolateCasesWithoutNearest = ::testing::Combine( ::testing::ValuesIn(modesWithoutNearest), + ::testing::ValuesIn(shapeCalculationMode), ::testing::ValuesIn(coordinateTransformModes), ::testing::ValuesIn(defaultNearestMode), ::testing::ValuesIn(antialias), ::testing::ValuesIn(pads), ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs)); + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes), + ::testing::ValuesIn(defaultScales)); const auto interpolateCases = ::testing::Combine( ::testing::ValuesIn(nearestMode), + ::testing::ValuesIn(shapeCalculationMode), ::testing::ValuesIn(coordinateTransformModes), ::testing::ValuesIn(nearestModes), ::testing::ValuesIn(antialias), ::testing::ValuesIn(pads), ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs)); + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes), + ::testing::ValuesIn(defaultScales)); INSTANTIATE_TEST_CASE_P(Interpolate_Basic, InterpolateLayerTest, ::testing::Combine( interpolateCasesWithoutNearest, diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp index bc0a1db..1904903 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp @@ -46,33 +46,37 @@ protected: std::tie(interpolateParams, netPrecision, inputShape, targetShape, targetDevice) = basicParamsSet; ngraph::op::v4::Interpolate::InterpolateMode mode; + ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode; ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode; ngraph::op::v4::Interpolate::NearestMode nearestMode; bool antialias; std::vector padBegin, padEnd; double cubeCoef; - std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams; + std::vector axes; + std::vector scales; + std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams; using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode; - ShapeCalcMode shape_calc_mode = ShapeCalcMode::sizes; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape); - std::vector scales(targetShape.size(), 1.0f); auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales); auto scalesInput = std::make_shared(scales_const); auto secondaryInput = std::make_shared(constant); - ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shape_calc_mode, padBegin, + auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes); + auto axesInput = std::make_shared(axesConst); + ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin, padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef}; auto interpolate = std::make_shared(params[0], secondaryInput, scalesInput, + axesInput, interpolateAttributes); interpolate->get_rt_info() = CPUTestsBase::setCPUInfo(inFmts, outFmts, priority); const ngraph::ResultVector results{std::make_shared(interpolate)}; @@ -126,6 +130,11 @@ const std::vector coordina ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners, }; +const std::vector shapeCalculationMode = { + ngraph::op::v4::Interpolate::ShapeCalcMode::sizes, + ngraph::op::v4::Interpolate::ShapeCalcMode::scales, +}; + const std::vector nearestModes = { ngraph::op::v4::Interpolate::NearestMode::simple, ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor, @@ -150,23 +159,37 @@ const std::vector cubeCoefs = { -0.75f, }; +const std::vector> defaultAxes = { + {2, 3} +}; + +const std::vector> defaultScales = { + {1.25f, 1.5f} +}; + const auto interpolateCasesNN = ::testing::Combine( ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest), + ::testing::ValuesIn(shapeCalculationMode), ::testing::ValuesIn(coordinateTransformModes), ::testing::ValuesIn(nearestModes), ::testing::ValuesIn(antialias), ::testing::ValuesIn(pads), ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs)); + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes), + ::testing::ValuesIn(defaultScales)); const auto interpolateCasesLinearOnnx = ::testing::Combine( ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx), + ::testing::ValuesIn(shapeCalculationMode), ::testing::ValuesIn(coordinateTransformModes), ::testing::ValuesIn(defNearestModes), ::testing::ValuesIn(antialias), ::testing::ValuesIn(pads), ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs)); + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes), + ::testing::ValuesIn(defaultScales)); INSTANTIATE_TEST_CASE_P(InterpolateNN_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp new file mode 100644 index 0000000..499a879 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp @@ -0,0 +1,125 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/interpolate.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector prc = { + InferenceEngine::Precision::FP16, + InferenceEngine::Precision::FP32, +}; + +const std::vector> inShapes = { + {1, 1, 23, 23}, +}; + +const std::vector> targetShapes = { + {1, 1, 46, 46}, +}; + +const std::vector modesWithoutNearest = { + ngraph::op::v4::Interpolate::InterpolateMode::linear, + ngraph::op::v4::Interpolate::InterpolateMode::cubic, + ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx, +}; + +const std::vector nearestMode = { + ngraph::op::v4::Interpolate::InterpolateMode::nearest, +}; + +const std::vector coordinateTransformModes = { + ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn, + ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel, + ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel, + ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric, + ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners, +}; + +const std::vector shapeCalculationMode = { + ngraph::op::v4::Interpolate::ShapeCalcMode::sizes, + ngraph::op::v4::Interpolate::ShapeCalcMode::scales, +}; + +const std::vector nearestModes = { + ngraph::op::v4::Interpolate::NearestMode::simple, + ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor, + ngraph::op::v4::Interpolate::NearestMode::floor, + ngraph::op::v4::Interpolate::NearestMode::ceil, + ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil, +}; + +const std::vector defaultNearestMode = { + ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor, +}; + +const std::vector> pads = { + {0, 0, 1, 1}, + {0, 0, 0, 0}, +}; + +const std::vector antialias = { +// Not enabled in Inference Engine +// true, + false, +}; + +const std::vector cubeCoefs = { + -0.75f, +}; + +const std::vector> defaultAxes = { + {0, 1, 2, 3} +}; + +const std::vector> defaultScales = { + {1.f, 1.f, 2.f, 2.f} +}; + +const auto interpolateCasesWithoutNearest = ::testing::Combine( + ::testing::ValuesIn(modesWithoutNearest), + ::testing::ValuesIn(shapeCalculationMode), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(defaultNearestMode), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads), + ::testing::ValuesIn(pads), + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes), + ::testing::ValuesIn(defaultScales)); + +const auto interpolateCasesNearesMode = ::testing::Combine( + ::testing::ValuesIn(nearestMode), + ::testing::ValuesIn(shapeCalculationMode), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(nearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads), + ::testing::ValuesIn(pads), + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes), + ::testing::ValuesIn(defaultScales)); + +INSTANTIATE_TEST_CASE_P(Interpolate_Basic, InterpolateLayerTest, ::testing::Combine( + interpolateCasesWithoutNearest, + ::testing::ValuesIn(prc), + ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(targetShapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + InterpolateLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(Interpolate_Nearest, InterpolateLayerTest, ::testing::Combine( + interpolateCasesNearesMode, + ::testing::ValuesIn(prc), + ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(targetShapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + InterpolateLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 9cd14ea..5fb4657 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -17,5 +17,7 @@ std::vector disabledTestPatterns() { R"(.*(ComparisonLayerTest).*)", // TODO: Issue: 39014 R"(.*CoreThreadingTestsWithIterations.*smoke_LoadNetwork.*)", + // TODO: Issue: 39612 + R"(.*Interpolate.*cubic.*tf_half_pixel_for_nn.*FP16.*)", }; -} \ No newline at end of file +} diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/interpolate.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/interpolate.hpp index 835d75c..17b6386 100644 --- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/interpolate.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/interpolate.hpp @@ -18,12 +18,15 @@ namespace LayerTestsDefinitions { typedef std::tuple< ngraph::op::v4::Interpolate::InterpolateMode, // InterpolateMode + ngraph::op::v4::Interpolate::ShapeCalcMode, // ShapeCalculationMode ngraph::op::v4::Interpolate::CoordinateTransformMode, // CoordinateTransformMode ngraph::op::v4::Interpolate::NearestMode, // NearestMode bool, // AntiAlias std::vector, // PadBegin std::vector, // PadEnd - double // Cube coef + double, // Cube coef + std::vector, // Axes + std::vector // Scales > InterpolateSpecificParams; typedef std::tuple< diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/interpolate.cpp index 08aabc3..db594c8 100644 --- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/interpolate.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/interpolate.cpp @@ -26,22 +26,28 @@ std::string InterpolateLayerTest::getTestCaseName(testing::TestParamInfo padBegin, padEnd; + std::vector axes; + std::vector scales; bool antialias; ngraph::op::v4::Interpolate::InterpolateMode mode; + ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode; ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode; ngraph::op::v4::Interpolate::NearestMode nearestMode; double cubeCoef; - std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams; + std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams; std::ostringstream result; result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; result << "TS=" << CommonTestUtils::vec2str(targetShapes) << "_"; result << "InterpolateMode=" << mode << "_"; + result << "ShapeCalcMode=" << shapeCalcMode << "_"; result << "CoordinateTransformMode=" << coordinateTransformMode << "_"; result << "NearestMode=" << nearestMode << "_"; result << "CubeCoef=" << cubeCoef << "_"; result << "Antialias=" << antialias << "_"; result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_"; result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "Axes=" << CommonTestUtils::vec2str(axes) << "_"; + result << "Scales=" << CommonTestUtils::vec2str(scales) << "_"; result << "netPRC=" << netPrecision.name() << "_"; result << "targetDevice=" << targetDevice; return result.str(); @@ -54,33 +60,35 @@ void InterpolateLayerTest::SetUp() { std::tie(interpolateParams, netPrecision, inputShape, targetShape, targetDevice) = this->GetParam(); std::vector padBegin, padEnd; + std::vector axes; + std::vector scales; bool antialias; ngraph::op::v4::Interpolate::InterpolateMode mode; + ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode; ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode; ngraph::op::v4::Interpolate::NearestMode nearestMode; - using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode; - ShapeCalcMode shape_calc_node = ShapeCalcMode::sizes; double cubeCoef; - std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams; + std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); - auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape); + auto sizesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape); + auto sizesInput = std::make_shared(sizesConst); - std::vector scales(targetShape.size(), 1.0f); auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales); - auto scalesInput = std::make_shared(scales_const); - auto secondaryInput = std::make_shared(constant); + auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes); + auto axesInput = std::make_shared(axesConst); - ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shape_calc_node, padBegin, + ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin, padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef}; auto interpolate = std::make_shared(params[0], - secondaryInput, + sizesInput, scalesInput, + axesInput, interpolateAttributes); const ngraph::ResultVector results{std::make_shared(interpolate)}; function = std::make_shared(results, params, "interpolate"); diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp index 5dc3878..32f767c 100644 --- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp @@ -252,5 +252,7 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::Coordin std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::NearestMode type); +std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type); + } // namespace helpers } // namespace ngraph diff --git a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp index 0e61048..4ab358e 100644 --- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp +++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp @@ -708,5 +708,19 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::Nearest return os; } +std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type) { + switch (type) { + case ngraph::op::v4::Interpolate::ShapeCalcMode::scales: + os << "scales"; + break; + case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes: + os << "sizes"; + break; + default: + throw std::runtime_error("NOT_SUPPORTED_OP_TYPE"); + } + return os; +} + } // namespace helpers } // namespace ngraph diff --git a/inference-engine/thirdparty/clDNN/api/resample.hpp b/inference-engine/thirdparty/clDNN/api/resample.hpp index 4527a35..add63e4 100644 --- a/inference-engine/thirdparty/clDNN/api/resample.hpp +++ b/inference-engine/thirdparty/clDNN/api/resample.hpp @@ -18,6 +18,8 @@ #pragma once #include "primitive.hpp" +#include + namespace cldnn { /// @addtogroup cpp_api C++ API /// @{ @@ -33,7 +35,47 @@ enum class resample_type : int32_t { /// @brief bilinear interpolation. bilinear, /// @brief caffe bilinear interpolation. - caffe_bilinear + caffe_bilinear, + /// @brief cubic interpolation. + cubic, + /// @brief linear onnx interpolation. + linear_onnx +}; + +/// @brief Specifies which of inputs target_spatial_shape or scales is used to calculate an output shape +enum class shape_calculation_mode : int32_t { + /// @brief output shape calculated based on sizes of input and output tensors + sizes, + /// @brief output shape calculated based on scales coefficients + scales +}; + +/// @brief Coordinate transformation mode for the @ref resample layer. +enum class coordinate_transformation_mode : int32_t { + /// @brief the coordinate in the original tensor axis `x` is calculated as `((x_resized + 0.5) / scale[x]) - 0.5`. + half_pixel, + /// @brief the coordinate in the original tensor axis `x` is calculated by `(x_resized + 0.5) / scale[x] - 0.5 if output_shape[x] > 1 else 0.0`. + pytorch_half_pixel, + /// @brief the coordinate in the original tensor axis `x` is calculated according to the formula `x_resized / scale[x]`. + asymmetric, + /// @brief the coordinate in the original tensor axis `x` is `(x_resized + 0.5) / scale[x]`. + tf_half_pixel_for_nn, + /// @brief the coordinate in the original tensor axis `x` is calculated as `0 if output_shape[x] == 1 else x_resized * (input_shape[x] - 1) / (output_shape[x] - 1)`. + align_corners +}; + +/// @brief Nearest mode for the @ref resample layer. +enum class nearest_mode : int32_t { + /// @brief this mode is known as round half down. + round_prefer_floor, + /// @brief it is round half up mode. + round_prefer_ceil, + /// @brief this mode computes the largest integer value not greater than rounded value. + floor, + /// @brief this mode computes the smallest integer value not less than rounded value + ceil, + /// @brief this mode behaves as `ceil` mode when `Interpolate` is downsample, and as dropping the fractional part otherwise. + simple }; /// @brief Performs nearest neighbor/bilinear resample @@ -41,6 +83,17 @@ enum class resample_type : int32_t { struct resample : public primitive_base { CLDNN_DECLARE_PRIMITIVE(resample) + enum resample_axis { + along_b, + along_f, + along_x, + along_y, + along_z, + along_w + }; + + using AxesAndScales = std::map; + /// @brief Constructs Resample primitive. /// @param id This primitive id. /// @param input Input primitive id. @@ -60,18 +113,23 @@ struct resample : public primitive_base { : primitive_base(id, {input}, output_padding), output_size(output_size), num_filter(num_filter), - pad_begin(0), - pad_end(0), align_corners(1), operation_type(operation_type), + shape_calc_mode(shape_calculation_mode::sizes), with_activation(with_activation), - activation_negative_slope(activation_slp) {} + activation_negative_slope(activation_slp), + coord_trans_mode(coordinate_transformation_mode::asymmetric), + round_mode(nearest_mode::floor) { + if (operation_type == resample_type::caffe_bilinear) { + coord_trans_mode = coordinate_transformation_mode::half_pixel; + } + } /// @brief Constructs Resample primitive with Interp operation. /// @param id This primitive id. /// @param input Input primitive id. - /// @param pad_begin Optional begin padding for input. - /// @param pad_end Optional end padding for input. + /// @param pads_begin Optional begin padding for input. + /// @param pads_end Optional end padding for input. /// @param align_corners Align corner pixels of the input and output tensors. /// @param resample_type Resample bilinear method. /// @param with_activation Enables Relu activation. @@ -79,8 +137,8 @@ struct resample : public primitive_base { resample(const primitive_id& id, const primitive_id& input, tensor output_size, - int32_t pad_begin = 0, - int32_t pad_end = 0, + std::vector pads_begin = {}, + std::vector pads_end = {}, int32_t align_corners = 1, resample_type operation_type = resample_type::bilinear, bool with_activation = false, @@ -89,29 +147,75 @@ struct resample : public primitive_base { : primitive_base(id, {input}, output_padding), output_size(output_size), num_filter(0), - pad_begin(pad_begin), - pad_end(pad_end), + pads_begin(pads_begin), + pads_end(pads_end), align_corners(align_corners), operation_type(operation_type), + shape_calc_mode(shape_calculation_mode::sizes), with_activation(with_activation), - activation_negative_slope(activation_slp) {} + activation_negative_slope(activation_slp), + coord_trans_mode(coordinate_transformation_mode::asymmetric), + round_mode(nearest_mode::floor) {} + + /// @brief Constructs Resample primitive with Interpolate operation. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param pads_begin Optional begin padding for input. + /// @param pads_end Optional end padding for input. + resample(const primitive_id& id, + const primitive_id& input, + tensor output_size, + AxesAndScales axesAndScales, + std::vector pads_begin = {}, + std::vector pads_end = {}, + int32_t antialias = 0, + float cube_coeff = -0.75f, + resample_type mode = resample_type::caffe_bilinear, + shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes, + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel, + nearest_mode nm = nearest_mode::round_prefer_floor, + const padding& output_padding = padding()) + : primitive_base(id, {input}, output_padding), + output_size(output_size), + axesAndScales(axesAndScales), + pads_begin(pads_begin), + pads_end(pads_end), + operation_type(mode), + shape_calc_mode(shape_calc_mode), + with_activation(false), + antialias(antialias), + cube_coeff(cube_coeff), + coord_trans_mode(ctm), + round_mode(nm) {} /// @param scale Resample scale. tensor output_size; /// @param num_filter Input filter. Only used by bilinear sample_type. uint32_t num_filter; - /// @param pad_begin Begin padding for input. - int32_t pad_begin; - /// @param pad_end End padding for input. - int32_t pad_end; + /// @param scales scales for spatial axes. + AxesAndScales axesAndScales; + /// @param pads_begin Begin paddings for input. + std::vector pads_begin; + /// @param pads_end End paddings for input. + std::vector pads_end; /// @param align_corners corner pixels of the input and output tensors int32_t align_corners; /// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear). resample_type operation_type; + /// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape. + shape_calculation_mode shape_calc_mode; /// @brief Enables Relu activation. bool with_activation; /// @brief Relu activation slope. float activation_negative_slope; + /// @param antialias is a flag that specifies whether to perform anti-aliasing. + int32_t antialias; + /// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic. + float cube_coeff; + /// @param specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor + coordinate_transformation_mode coord_trans_mode; + /// @param specifies round mode when mode == nearest and is used only when mode == nearest. + nearest_mode round_mode; }; /// @} /// @} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h index 5308e41..85a2793 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h @@ -378,6 +378,38 @@ enum class ResampleType { NEAREST_NEIGHBOR, BILINEAR_INTERP, CAFFE_BILINEAR_INTERP, + CUBIC, + LINEAR_ONNX, +}; + +enum class CoordinateTransformationMode { + HALF_PIXEL, + PYTORCH_HALF_PIXEL, + ASYMMETRIC, + TF_HALF_PIXEL_FOR_NN, + ALIGN_CORNERS, +}; + +enum class NearestMode { + ROUND_PREFER_FLOOR, + ROUND_PREFER_CEIL, + FLOOR, + CEIL, + SIMPLE, +}; + +enum class ShapeCalculationMode { + SIZES, + SCALES, +}; + +enum class InterpolateAxis { + X, + Y, + Z, + W, + FEATURE, + BATCH }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp index d44b0f3..6f933f4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp @@ -17,6 +17,27 @@ #include #include #include +#include +#include + +namespace { +int getAxisIndex(kernel_selector::InterpolateAxis axis) { + switch(axis) { + case kernel_selector::InterpolateAxis::BATCH: + return 0; + case kernel_selector::InterpolateAxis::FEATURE: + return 1; + case kernel_selector::InterpolateAxis::Z: + return 2; + case kernel_selector::InterpolateAxis::Y: + return 3; + case kernel_selector::InterpolateAxis::X: + return 4; + default: + return 0; + } +} +} // namespace namespace kernel_selector { @@ -44,14 +65,16 @@ ResampleKernelBase::DispatchData ResampleKernelBase::SetDefault(const kernel_sel if (arg.resampleType == ResampleType::NEAREST_NEIGHBOR) global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; - else if (arg.resampleType == ResampleType::BILINEAR_INTERP) + else if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) global = {Align(out.X().v, 32), out.Y().v, out.Batch().v}; else if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP) global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v}; + else + global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v}; local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo); - if (arg.resampleType == ResampleType::BILINEAR_INTERP) { + if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) { local[0] = 32; local[1] = 1; local[2] = 1; @@ -102,41 +125,77 @@ JitConstants ResampleKernelBase::GetJitConstants(const resample_params& params) const auto& input = params.inputs[0]; const auto& output = params.output; const auto align_corners = params.align_corners; - const auto pad_begin = params.pad_begin; - const auto pad_end = params.pad_end; - const auto x_size_padded = pad_begin + input.X().v + pad_end; - const auto y_size_padded = pad_begin + input.Y().v + pad_end; - const auto z_size_padded = pad_begin + input.Z().v + pad_end; - const auto out_x_size_padded = pad_begin + output.X().v + pad_end; - const auto out_y_size_padded = pad_begin + output.Y().v + pad_end; - const auto out_z_size_padded = pad_begin + output.Z().v + pad_end; - float x_ratio = 0; - float y_ratio = 0; - float z_ratio = 0; + auto pads_begin = params.pads_begin; + auto pads_end = params.pads_end; + if (pads_begin.size() == 4) + pads_begin.insert(std::next(pads_begin.begin(), 2), 0); + if (pads_end.size() == 4) + pads_end.insert(std::next(pads_end.begin(), 2), 0); + + const auto b_size_padded = pads_begin[0] + input.Batch().v + pads_end[0]; + const auto f_size_padded = pads_begin[1] + input.Feature().v + pads_end[1]; + const auto x_size_padded = pads_begin[4] + input.X().v + pads_end[4]; + const auto y_size_padded = pads_begin[3] + input.Y().v + pads_end[3]; + const auto z_size_padded = pads_begin[2] + input.Z().v + pads_end[2]; + const auto out_b_size_padded = output.Batch().v; + const auto out_f_size_padded = output.Feature().v; + const auto out_x_size_padded = output.X().v; + const auto out_y_size_padded = output.Y().v; + const auto out_z_size_padded = output.Z().v; + std::vector scales(5); + std::vector axesUsed(5, 0); + bool paddingUsed = false; + for (size_t i = 0; i < pads_begin.size(); ++i) { + paddingUsed |= (pads_begin[i] != 0 || pads_end[i] != 0); + } if (align_corners) { - x_ratio = (out_x_size_padded) > 1 ? static_cast(x_size_padded - 1) / static_cast(out_x_size_padded - 1) : 0.0f; - y_ratio = (out_y_size_padded) > 1 ? static_cast(y_size_padded - 1) / static_cast(out_y_size_padded - 1) : 0.0f; - z_ratio = (out_z_size_padded) > 1 ? static_cast(z_size_padded - 1) / static_cast(out_z_size_padded - 1) : 0.0f; + scales[0] = (out_b_size_padded) > 1 + ? static_cast(b_size_padded - 1) / static_cast(out_b_size_padded - 1) + : 0.0f; + scales[1] = (out_f_size_padded) > 1 + ? static_cast(f_size_padded - 1) / static_cast(out_f_size_padded - 1) + : 0.0f; + scales[4] = (out_x_size_padded) > 1 + ? static_cast(x_size_padded - 1) / static_cast(out_x_size_padded - 1) + : 0.0f; + scales[3] = (out_y_size_padded) > 1 + ? static_cast(y_size_padded - 1) / static_cast(out_y_size_padded - 1) + : 0.0f; + scales[2] = (out_z_size_padded) > 1 + ? static_cast(z_size_padded - 1) / static_cast(out_z_size_padded - 1) + : 0.0f; } else { - x_ratio = static_cast(x_size_padded) / static_cast(out_x_size_padded); - y_ratio = static_cast(y_size_padded) / static_cast(out_y_size_padded); - z_ratio = static_cast(z_size_padded) / static_cast(out_z_size_padded); + scales[0] = static_cast(b_size_padded) / static_cast(out_b_size_padded); + scales[1] = static_cast(f_size_padded) / static_cast(out_f_size_padded); + scales[4] = static_cast(x_size_padded) / static_cast(out_x_size_padded); + scales[3] = static_cast(y_size_padded) / static_cast(out_y_size_padded); + scales[2] = static_cast(z_size_padded) / static_cast(out_z_size_padded); + } + for (const auto& it : params.axesAndScales) { + int idx = getAxisIndex(it.first); + axesUsed[idx] = 1; + if (params.shapeCalculationMode == kernel_selector::ShapeCalculationMode::SCALES) + scales[idx] = 1.f / it.second; + } + for (size_t i = 0; i < scales.size(); ++i) { + if (scales[i] != 1.f) + axesUsed[i] = 1; } jit.AddConstants({ MakeJitConstant(toString(params.resampleType), ""), - MakeJitConstant("X_RATIO", x_ratio), - MakeJitConstant("Y_RATIO", y_ratio), - MakeJitConstant("Z_RATIO", z_ratio), - MakeJitConstant("X_RATIO_HALF", x_ratio / 2.0f), - MakeJitConstant("Y_RATIO_HALF", y_ratio / 2.0f), - MakeJitConstant("Z_RATIO_HALF", z_ratio / 2.0f), - MakeJitConstant("PAD_BEGIN", pad_begin), - MakeJitConstant("PAD_END", pad_end), + MakeJitConstant(toString(params.nearestMode), ""), + MakeJitConstant(toString(params.coordTransMode), ""), + MakeJitConstant("SCALES", scales), + MakeJitConstant("PADS_BEGIN", pads_begin), + MakeJitConstant("PADS_END", pads_end), + MakeJitConstant("PADDING_USED", (int)paddingUsed), + MakeJitConstant("AXES_USED", axesUsed), MakeJitConstant("ALIGN_CORNERS", align_corners), MakeJitConstant("KERNEL_W", 2), - MakeJitConstant("ANTIALIAS", 0) + MakeJitConstant("ANTIALIAS", params.antialias), + MakeJitConstant("CUBE_COEFF", params.cube_coeff), }); size_t feature_block_size = GetFeatureBlockSize(params); @@ -149,7 +208,7 @@ JitConstants ResampleKernelBase::GetJitConstants(const resample_params& params) } } - if (params.resampleType == ResampleType::BILINEAR_INTERP) { + if (params.resampleType == ResampleType::BILINEAR_INTERP || params.resampleType == ResampleType::LINEAR_ONNX) { if (params.output.X().v % 32 != 0) { jit.AddConstant(MakeJitConstant("LEFTOVERS", 1)); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.h index f2a3c31..bc4e34a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.h @@ -16,6 +16,8 @@ #include "common_kernel_base.h" +#include + namespace kernel_selector { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // resample_params @@ -23,10 +25,17 @@ namespace kernel_selector { struct resample_params : public base_params { resample_params() : base_params(KernelType::RESAMPLE) {} - uint32_t pad_begin = 0; - uint32_t pad_end = 0; + std::vector pads_begin = {}; + std::vector pads_end = {}; uint32_t align_corners = 0; ResampleType resampleType = ResampleType::NEAREST_NEIGHBOR; + CoordinateTransformationMode coordTransMode = CoordinateTransformationMode::HALF_PIXEL; + NearestMode nearestMode = NearestMode::ROUND_PREFER_FLOOR; + ShapeCalculationMode shapeCalculationMode = ShapeCalculationMode::SIZES; + uint32_t antialias = 0; + float cube_coeff = -0.75f; + using AxesAndScales = std::map; + AxesAndScales axesAndScales; virtual ParamsKey GetParamsKey() const { auto k = base_params::GetParamsKey(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp index d7d7484..90069a7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp @@ -40,6 +40,7 @@ ParamsKey ResampleKernelRef::GetSupportedKey() const { k.EnableReampleType(ResampleType::NEAREST_NEIGHBOR); k.EnableReampleType(ResampleType::CAFFE_BILINEAR_INTERP); k.EnableReampleType(ResampleType::BILINEAR_INTERP); + k.EnableReampleType(ResampleType::CUBIC); return k; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_opt.cl index a870286..2824c19 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_opt.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_opt.cl @@ -52,13 +52,13 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input, unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) { #ifdef SAMPLE_TYPE_NEAREST - const int ix = floor((x + out_x) * X_RATIO); - const int iy = floor(y * Y_RATIO); + const int ix = floor((x + out_x) * SCALES[4]); + const int iy = floor(y * SCALES[3]); in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iy, ix)); #else - const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(X_RATIO) * (x + out_x); - const ACCUMULATOR_TYPE iy = TO_ACCUMULATOR_TYPE(Y_RATIO) * y; + const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(SCALES[4]) * (x + out_x); + const ACCUMULATOR_TYPE iy = TO_ACCUMULATOR_TYPE(SCALES[3]) * y; const int top_y_index = (int)(floor(iy)); const int bottom_y_index = min((int)ceil(iy), INPUT0_SIZE_Y - 1); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl index a7372ed..f20ce3d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl @@ -38,6 +38,48 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint z, uint y, uint x) #endif } +inline int FUNC(get_nearest_val)(float num, bool is_downsample) +{ +#if defined(NEAREST_ROUND_PREFER_FLOOR) + return (num == (int)num + 0.5f) ? (int)floor(num) : (int)round(num); +#elif defined(NEAREST_ROUND_PREFER_CEIL) + return (int)round(num); +#elif defined(NEAREST_FLOOR) + return (int)floor(num); +#elif defined(NEAREST_CEIL) + return (int)ceil(num); +#elif defined(NEAREST_SIMPLE) + return is_downsample ? (int)ceil(num) : (int)num; +#else +#error [clDNN resample_ref.cl]: nearest mode - not supported +#endif +} + +inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original) +{ +#if defined(COORD_TRANS_MODE_HALF_PIXEL) + return (num + 0.5f) * scale - 0.5f; +#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL) + return (length_resized > 1) ? (num + 0.5f) * scale - 0.5f : 0.f; +#elif defined(COORD_TRANS_MODE_ASYMMETRIC) + return num * scale; +#elif defined(COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN) + return (num + 0.5f) * scale; +#elif defined(COORD_TRANS_MODE_ALIGN_CORNERS) + return (length_resized != 1) ? num * (length_original - 1) / (length_resized - 1) : 0.f; +#else +#error [clDNN resample_ref.cl]: coordinate transformation mode - not supported +#endif +} + +inline void FUNC(get_cubic_coeff)(float* cubic_coef, float coord, float coef) +{ + float abs_num = fabs(coord); + cubic_coef[0] = coef * (abs_num - 1.0) * (abs_num - 1.0) * abs_num; + cubic_coef[1] = ((coef + 2.0) * abs_num - (coef + 3.0)) * abs_num * abs_num + 1.0; + cubic_coef[2] = (((-coef - 2.0) * abs_num + (2.0 * coef + 3.0)) * abs_num - coef) * abs_num; + cubic_coef[3] = -coef * abs_num * abs_num * (abs_num - 1.0); +} #define TRIANGLE_COEFF(x) (ACCUMULATOR_MAX_FUNC(ACCUMULATOR_VAL_ZERO, ACCUMULATOR_VAL_ONE - ACCUMULATOR_ABS_FUNC(x))) #define unroll_for __attribute__((opencl_unroll_hint)) for @@ -49,73 +91,223 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input, #endif ) { + const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X }; + const int out_size[5] = { OUTPUT_BATCH_NUM, OUTPUT_FEATURE_NUM, OUTPUT_SIZE_Z, OUTPUT_SIZE_Y, OUTPUT_SIZE_X }; #if defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE typedef MAKE_VECTOR_TYPE(INPUT0_TYPE, PACK_SIZE) in_pack_t; typedef MAKE_VECTOR_TYPE(OUTPUT_TYPE, PACK_SIZE) out_pack_t; - const int ox = get_global_id(0); + int out_coords[5]; + out_coords[4] = get_global_id(0); #if OUTPUT_DIMS <= 4 - const int oy = get_global_id(1); - const int oz = 0; -#else - const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y; - const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y; + out_coords[3] = get_global_id(1); + out_coords[2] = 0; +#else // OUTPUT_DIMS <= 4 + out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y; + out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y; +#endif // OUTPUT_DIMS <= 4 + out_coords[1] = ((int)get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM; + out_coords[0] = ((int)get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM; + int in_coords[5]; + bool isOutOfBounds = false; + unroll_for (int i = 0; i < 5; ++i) { + const float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]); + const int nearest_pixel = FUNC_CALL(get_nearest_val)(orig_coord, SCALES[i] > 1) - PADS_BEGIN[i]; + in_coords[i] = max(-PADS_BEGIN[0], min(nearest_pixel, in_size[i] + PADS_END[i] - 1)); +#if PADDING_USED == 1 + if (in_coords[i] < 0 || in_coords[i] >= in_size[i]) + isOutOfBounds = true; #endif - const int feature = ((int)get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM; - const int batch = ((int)get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM; - const int ix = floor(ox * X_RATIO); - const int iy = floor(oy * Y_RATIO); - const int iz = floor(oz * Z_RATIO); + } - uint input_idx = FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix); - uint output_idx = FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox); + uint input_idx = FUNC_CALL(get_input_index)(in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4]); + uint output_idx = FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4]); in_pack_t interp_val_pack = ((const __global in_pack_t*)(input + input_idx))[0]; out_pack_t res; unroll_for (uint pi = 0; pi < PACK_SIZE; ++pi) { INPUT0_TYPE interp_val = interp_val_pack[pi]; +#if PADDING_USED == 1 + if (isOutOfBounds) + interp_val = INPUT0_VAL_ZERO; +#endif #if HAS_FUSED_OPS - #define OF_ID (feature + pi) + #define OF_ID (out_coords[1] + pi) FUSED_OPS; res[pi] = FUSED_OPS_RESULT; - #else + #else // HAS_FUSED_OPS res[pi] = ACTIVATION(interp_val, ACTIVATION_PARAMS); - #endif + #endif // HAS_FUSED_OPS } ((__global out_pack_t*)(output + output_idx))[0] = res; -#elif defined(SAMPLE_TYPE_NEAREST) - const int ox = get_global_id(0); +#elif defined(SAMPLE_TYPE_NEAREST) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE + int out_coords[5]; + out_coords[4] = get_global_id(0); #if OUTPUT_DIMS <= 4 - const int oy = get_global_id(1); - const int oz = 0; -#else - const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y; - const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y; + out_coords[3] = get_global_id(1); + out_coords[2] = 0; +#else // OUTPUT_DIMS <= 4 + out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y; + out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y; +#endif // OUTPUT_DIMS <= 4 + out_coords[1] = (int)get_global_id(2) % OUTPUT_FEATURE_NUM; + out_coords[0] = (int)get_global_id(2) / OUTPUT_FEATURE_NUM; + int in_coords[5]; + bool isOutOfBounds = false; + unroll_for (int i = 0; i < 5; ++i) { + const float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]); + int nearest_pixel = FUNC_CALL(get_nearest_val)(orig_coord, SCALES[i] > 1) - PADS_BEGIN[i]; + in_coords[i] = max(-PADS_BEGIN[i], min(nearest_pixel, in_size[i] + PADS_END[i] - 1)); +#if PADDING_USED == 1 + if (in_coords[i] < 0 || in_coords[i] >= in_size[i]) + isOutOfBounds = true; +#endif + } + INPUT0_TYPE interp_val = input[FUNC_CALL(get_input_index)(in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4])]; +#if PADDING_USED == 1 + if (isOutOfBounds) + interp_val = INPUT0_VAL_ZERO; #endif - const int feature = (int)get_global_id(2) % OUTPUT_FEATURE_NUM; - const int batch = (int)get_global_id(2) / OUTPUT_FEATURE_NUM; - const int ix = floor(ox * X_RATIO); - const int iy = floor(oy * Y_RATIO); - const int iz = floor(oz * Z_RATIO); - - INPUT0_TYPE interp_val = input[FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix)]; #if HAS_FUSED_OPS - #define OF_ID (feature) + #define OF_ID (out_coords[1]) FUSED_OPS; OUTPUT_TYPE res = FUSED_OPS_RESULT; -#else +#else // HAS_FUSED_OPS OUTPUT_TYPE res = ACTIVATION(interp_val, ACTIVATION_PARAMS); +#endif // HAS_FUSED_OPS + output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res; +#elif defined(SAMPLE_TYPE_CUBIC) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE + int out_coords[5]; + out_coords[4] = get_global_id(0); +#if OUTPUT_DIMS <= 4 + out_coords[3] = get_global_id(1); + out_coords[2] = 0; +#else // OUTPUT_DIMS <= 4 + out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y; + out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y; +#endif // OUTPUT_DIMS <= 4 + out_coords[1] = (int)get_global_id(2) % OUTPUT_FEATURE_NUM; + out_coords[0] = (int)get_global_id(2) / OUTPUT_FEATURE_NUM; + int in_coords[5]; + float cubic_coeff[5][4]; + unroll_for (int i = 0; i < 5; ++i) { + float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]) - PADS_BEGIN[i]; + in_coords[i] = floor(orig_coord); + orig_coord = (orig_coord - in_coords[i]) * AXES_USED[i]; + FUNC_CALL(get_cubic_coeff)(cubic_coeff[i], orig_coord, CUBE_COEFF); + } + + INPUT0_TYPE interp_val = INPUT0_VAL_ZERO; + int index[5]; + unroll_for (index[0] = 0; index[0] <= 3; ++index[0]) { + unroll_for (index[1] = 0; index[1] <= 3; ++index[1]) { + unroll_for (index[2] = 0; index[2] <= 3; ++index[2]) { + unroll_for (index[3] = 0; index[3] <= 3; ++index[3]) { + unroll_for (index[4] = 0; index[4] <= 3; ++index[4]) { + int coords_sum[5] = { in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4] }; + float coeff_prod = 1.0f; + bool isOutOfBounds = false; + unroll_for (int i = 0; i < 5; ++i) { + coords_sum[i] = max(-PADS_BEGIN[i], min(in_coords[i] + index[i] - 1, PADS_END[i] + in_size[i] - 1)); +#if PADDING_USED == 1 + if (coords_sum[i] < 0 || coords_sum[i] >= in_size[i]) + isOutOfBounds = true; +#endif + coeff_prod *= cubic_coeff[i][index[i]]; + } +#if PADDING_USED == 1 + if (!isOutOfBounds) +#endif + interp_val += coeff_prod * input[FUNC_CALL(get_input_index)(coords_sum[0], coords_sum[1], coords_sum[2], coords_sum[3], coords_sum[4])]; + } + } + } + } + } + +#if HAS_FUSED_OPS + #define OF_ID (out_coords[1]) + FUSED_OPS; + OUTPUT_TYPE res = FUSED_OPS_RESULT; +#else // HAS_FUSED_OPS + OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS); +#endif // HAS_FUSED_OPS + output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res; +#elif defined(SAMPLE_TYPE_LINEAR_ONNX) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE + const int ox = get_global_id(0); + const int oy = get_global_id(1); + const int feature = 0; + const int batch = get_global_id(2); + const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3]; + const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4]; + const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X); + const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y); + +#ifdef LEFTOVERS + if (ox >= OUTPUT_SIZE_X) + return; #endif - output[FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox)] = res; -#elif defined(SAMPLE_TYPE_INTERP) + float in_y = fmax(0, fmin(iy, PADDED_Y - 1)); + float in_x = fmax(0, fmin(ix, PADDED_X - 1)); + int in_y1 = min((int)in_y, PADDED_Y - 1); + int in_y2 = min(in_y1 + 1, PADDED_Y - 1); + int in_x1 = min((int)in_x, PADDED_X - 1); + int in_x2 = min(in_x1 + 1, PADDED_X - 1); + + const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f; + const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f; + const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f; + const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f; +#if PADDING_USED == 1 + in_y1 -= PADS_BEGIN[3]; + in_y2 -= PADS_BEGIN[3]; + in_x1 -= PADS_BEGIN[4]; + in_x2 -= PADS_BEGIN[4]; + + bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4]; + bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4]; + bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4]; + bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4]; +#endif + unroll_for(int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) { + INPUT0_TYPE top_left = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x1)]; + INPUT0_TYPE top_right = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x2)]; + INPUT0_TYPE bottom_left = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x1)]; + INPUT0_TYPE bottom_right = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x2)]; +#if PADDING_USED == 1 + if (tlOutOfBounds) + top_left = INPUT0_VAL_ZERO; + if (trOutOfBounds) + top_right = INPUT0_VAL_ZERO; + if (blOutOfBounds) + bottom_left = INPUT0_VAL_ZERO; + if (brOutOfBounds) + bottom_right = INPUT0_VAL_ZERO; +#endif + + ACCUMULATOR_TYPE interp_val = TO_ACCUMULATOR_TYPE(dx2 * dy2 * top_left) + + TO_ACCUMULATOR_TYPE(dx1 * dy2 * top_right) + + TO_ACCUMULATOR_TYPE(dx2 * dy1 * bottom_left) + + TO_ACCUMULATOR_TYPE(dx1 * dy1 * bottom_right); + +#if HAS_FUSED_OPS + #define OF_ID (in_f) + FUSED_OPS; + OUTPUT_TYPE res = FUSED_OPS_RESULT; +#else + OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS); +#endif + output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res; + } +#elif defined(SAMPLE_TYPE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE const int ox = get_global_id(0); const int oy = get_global_id(1); const int feature = 0; const int batch = get_global_id(2); - const float ix = X_RATIO * ox; - const float iy = Y_RATIO * oy; + const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], OUTPUT_SIZE_X, in_size[4]); + const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], OUTPUT_SIZE_Y, in_size[3]); #ifdef LEFTOVERS if (ox >= OUTPUT_SIZE_X) @@ -123,9 +315,9 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input, #endif const int top_y_index = (int)(floor(iy)); - const int bottom_y_index = min((int)ceil(iy), INPUT0_SIZE_Y - 1); + const int bottom_y_index = min((int)ceil(iy), in_size[3] - 1); const int left_x_index = (int)(floor(ix)); - const int right_x_index = min((int)ceil(ix), INPUT0_SIZE_X - 1); + const int right_x_index = min((int)ceil(ix), in_size[4] - 1); const ACCUMULATOR_TYPE dx = TO_ACCUMULATOR_TYPE(ix - left_x_index); const ACCUMULATOR_TYPE dy = TO_ACCUMULATOR_TYPE(iy - top_y_index); @@ -146,11 +338,11 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input, FUSED_OPS; OUTPUT_TYPE res = FUSED_OPS_RESULT; #else - OUTPUT_TYPE res = TO_OUTPUT_TYPE(ACTIVATION(interp_val, ACTIVATION_PARAMS)); + OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS); #endif output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res; } -#elif defined(SAMPLE_TYPE_CAFFE_INTERP) +#elif defined(SAMPLE_TYPE_CAFFE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE const int ox = (int)get_global_id(0) % OUTPUT_SIZE_X; const int oy = (int)get_global_id(0) / OUTPUT_SIZE_X; const int feature_block_nun = get_global_id(1); @@ -162,84 +354,123 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input, const int batch = (int)get_global_id(2) % OUTPUT_BATCH_NUM; const int oz = (int)get_global_id(2) / OUTPUT_BATCH_NUM; #endif + const int PADDED_B = in_size[0] + PADS_BEGIN[0] + PADS_END[0]; + const int PADDED_F = in_size[1] + PADS_BEGIN[1] + PADS_END[1]; + const int PADDED_Z = in_size[2] + PADS_BEGIN[2] + PADS_END[2]; + const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3]; + const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4]; - const ACCUMULATOR_TYPE ix = ox * X_RATIO + X_RATIO_HALF - 0.5f; - const ACCUMULATOR_TYPE iy = oy * Y_RATIO + Y_RATIO_HALF - 0.5f; - const ACCUMULATOR_TYPE iz = oz * Z_RATIO + Z_RATIO_HALF - 0.5f; + ACCUMULATOR_TYPE i_b = AXES_USED[0] ? FUNC_CALL(get_original_coordinate)(batch, SCALES[0], out_size[0], PADDED_B) : batch; + ACCUMULATOR_TYPE i_f = AXES_USED[1] ? FUNC_CALL(get_original_coordinate)(feature, SCALES[1], out_size[1], PADDED_F) : feature; + ACCUMULATOR_TYPE i_x = AXES_USED[4] ? FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X) : ox; + ACCUMULATOR_TYPE i_y = AXES_USED[3] ? FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y) : oy; + ACCUMULATOR_TYPE i_z = AXES_USED[2] ? FUNC_CALL(get_original_coordinate)(oz, SCALES[2], out_size[2], PADDED_Z) : oz; +#if PADDING_USED == 1 + i_b -= PADS_BEGIN[0]; + i_f -= PADS_BEGIN[1]; + i_z -= PADS_BEGIN[2]; + i_y -= PADS_BEGIN[3]; + i_x -= PADS_BEGIN[4]; +#endif - const int ix_r = (int)ix; - const int iy_r = (int)iy; - const int iz_r = (int)iz; + const int ib_r = (int)i_b; + const int if_r = (int)i_f; + const int ix_r = (int)i_x; + const int iy_r = (int)i_y; + const int iz_r = (int)i_z; #if ANTIALIAS == 1 - const ACCUMULATOR_TYPE ax = 1.0f / X_RATIO; - const ACCUMULATOR_TYPE ay = 1.0f / Y_RATIO; - const ACCUMULATOR_TYPE az = 1.0f / Z_RATIO; + const ACCUMULATOR_TYPE ab = 1.0f / SCALES[0]; + const ACCUMULATOR_TYPE af = 1.0f / SCALES[1]; + const ACCUMULATOR_TYPE ax = 1.0f / SCALES[4]; + const ACCUMULATOR_TYPE ay = 1.0f / SCALES[3]; + const ACCUMULATOR_TYPE az = 1.0f / SCALES[2]; #else + const ACCUMULATOR_TYPE ab = 1.0f; + const ACCUMULATOR_TYPE af = 1.0f; const ACCUMULATOR_TYPE ax = 1.0f; const ACCUMULATOR_TYPE ay = 1.0f; const ACCUMULATOR_TYPE az = 1.0f; #endif - const int rx = (X_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ax); - const int ry = (Y_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ay); - const int rz = (Z_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / az); - - ACCUMULATOR_TYPE sum[FEATURE_BLOCK_SIZE]; - for (int i = 0; i < FEATURE_BLOCK_SIZE; i++) - sum[i] = 0; - - ACCUMULATOR_TYPE wsum = 0; - - int const y_init = max(0, iy_r - ry); - int const x_init = max(0, ix_r - rx); - int const z_init = max(0, iz_r - rz); - int const y_max = min(INPUT0_SIZE_Y, iy_r + ry + 1); - int const x_max = min(INPUT0_SIZE_X, ix_r + rx + 1); - int const z_max = min(INPUT0_SIZE_Z, iz_r + rz + 1); - - unroll_for(int z = z_init; z < z_max; z++) { - unroll_for(int y = y_init; y < y_max; y++) { - unroll_for(int x = x_init; x < x_max; x++) { - ACCUMULATOR_TYPE dx = ix - x; - ACCUMULATOR_TYPE dy = iy - y; - ACCUMULATOR_TYPE dz = iz - z; -#if ANTIALIAS == 1 - ACCUMULATOR_TYPE w = ax * TRIANGLE_COEFF(ax * dx) * ay * TRIANGLE_COEFF(ay * dy) * az * triangleCoeff(az * dz); + const int rb = (SCALES[0] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ab); + const int rf = (SCALES[1] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / af); + const int rx = (SCALES[4] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ax); + const int ry = (SCALES[3] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ay); + const int rz = (SCALES[2] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / az); + + int const b_init = max(-PADS_BEGIN[0], ib_r - rb); + int const f_init = max(-PADS_BEGIN[1], if_r - rf); + int const y_init = max(-PADS_BEGIN[3], iy_r - ry); + int const x_init = max(-PADS_BEGIN[4], ix_r - rx); + int const z_init = max(-PADS_BEGIN[2], iz_r - rz); + int const b_max = min(PADS_END[0] + INPUT0_BATCH_NUM, ib_r + rb + 1); + int const f_max = min(PADS_END[1] + INPUT0_FEATURE_NUM, if_r + rf + 1); + int const y_max = min(PADS_END[3] + INPUT0_SIZE_Y, iy_r + ry + 1); + int const x_max = min(PADS_END[4] + INPUT0_SIZE_X, ix_r + rx + 1); + int const z_max = min(PADS_END[2] + INPUT0_SIZE_Z, iz_r + rz + 1); +#ifndef LEFTOVERS + const int fp_max = FEATURE_BLOCK_SIZE; #else - ACCUMULATOR_TYPE w = TRIANGLE_COEFF(dx) * TRIANGLE_COEFF(dy) * TRIANGLE_COEFF(dz); + const int fp_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER); #endif + ACCUMULATOR_TYPE sum[fp_max] = {0}; + ACCUMULATOR_TYPE wsum[fp_max] = {0}; -#ifndef LEFTOVERS - unroll_for(int f = 0; f < FEATURE_BLOCK_SIZE; f++) { + unroll_for(int b = b_init; b < b_max; b++) { + unroll_for(int f = f_init; f < f_max; f++) { + unroll_for(int z = z_init; z < z_max; z++) { + unroll_for(int y = y_init; y < y_max; y++) { + unroll_for(int x = x_init; x < x_max; x++) { + unroll_for(int fp = 0; fp < fp_max; fp++) { +#if PADDING_USED == 1 + bool isOutOfBounds = b < 0 || f < 0 || z < 0 || y < 0 || x < 0 || + b >= in_size[0] || f >= in_size[1] || z >= in_size[2] || + y >= in_size[3] || x >= in_size[4]; +#endif + + ACCUMULATOR_TYPE db = i_b - b; + ACCUMULATOR_TYPE df = i_f - f; + ACCUMULATOR_TYPE dx = i_x - x; + ACCUMULATOR_TYPE dy = i_y - y; + ACCUMULATOR_TYPE dz = i_z - z; +#if ANTIALIAS == 1 + ACCUMULATOR_TYPE w = ab * TRIANGLE_COEFF(ab * db) * + af * TRIANGLE_COEFF(af * df) * + ax * TRIANGLE_COEFF(ax * dx) * + ay * TRIANGLE_COEFF(ay * dy) * + az * TRIANGLE_COEFF(az * dz); #else - const int f_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER); - unroll_for(int f = 0; f < f_max; f++) { + ACCUMULATOR_TYPE w = TRIANGLE_COEFF(db) * + TRIANGLE_COEFF(df) * + TRIANGLE_COEFF(dx) * + TRIANGLE_COEFF(dy) * + TRIANGLE_COEFF(dz); #endif - if (w != 0) - sum[f] += w * TO_ACCUMULATOR_TYPE(input[FUNC_CALL(get_input_index)(batch, feature + f, z, y, x)]); + if (w != 0 && f + fp < INPUT0_FEATURE_NUM) { + wsum[fp] += w; +#if PADDING_USED == 1 + if (!isOutOfBounds) +#endif + sum[fp] += w * TO_ACCUMULATOR_TYPE(input[FUNC_CALL(get_input_index)(b, f + fp, z, y, x)]); + } + } + } } - wsum += w; } } } -#ifndef LEFTOVERS - unroll_for (int f = 0; f < FEATURE_BLOCK_SIZE; f++) { -#else - const int f_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER); - unroll_for (int f = 0; f < f_max; f++) { -#endif - - ACCUMULATOR_TYPE interp_val = (wsum == 0) ? 0 : (sum[f] / wsum); + unroll_for (int f = 0; f < fp_max; f++) { + ACCUMULATOR_TYPE interp_val = (wsum[f] == 0) ? ACCUMULATOR_VAL_ZERO : (sum[f] / wsum[f]); #if HAS_FUSED_OPS #define OF_ID (feature + f) FUSED_OPS; OUTPUT_TYPE res = FUSED_OPS_RESULT; #else - OUTPUT_TYPE res = TO_OUTPUT_TYPE(ACTIVATION(interp_val, ACTIVATION_PARAMS)); + OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS); #endif output[FUNC_CALL(get_output_index)(batch, feature + f, oz, oy, ox)] = res; } -#endif +#endif // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE } #undef unroll_for diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp index e9f97ae..95bac97 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp @@ -429,6 +429,30 @@ std::string toString(ResampleType type) { case ResampleType::NEAREST_NEIGHBOR: return "SAMPLE_TYPE_NEAREST"; case ResampleType::BILINEAR_INTERP: return "SAMPLE_TYPE_INTERP"; case ResampleType::CAFFE_BILINEAR_INTERP: return "SAMPLE_TYPE_CAFFE_INTERP"; + case ResampleType::CUBIC: return "SAMPLE_TYPE_CUBIC"; + case ResampleType::LINEAR_ONNX: return "SAMPLE_TYPE_LINEAR_ONNX"; + default: return ""; + } +} + +std::string toString(CoordinateTransformationMode mode) { + switch (mode) { + case CoordinateTransformationMode::HALF_PIXEL: return "COORD_TRANS_MODE_HALF_PIXEL"; + case CoordinateTransformationMode::PYTORCH_HALF_PIXEL: return "COORD_TRANS_MODE_PYTORCH_HALF_PIXEL"; + case CoordinateTransformationMode::ASYMMETRIC: return "COORD_TRANS_MODE_ASYMMETRIC"; + case CoordinateTransformationMode::TF_HALF_PIXEL_FOR_NN: return "COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN"; + case CoordinateTransformationMode::ALIGN_CORNERS: return "COORD_TRANS_MODE_ALIGN_CORNERS"; + default: return ""; + } +} + +std::string toString(NearestMode mode) { + switch (mode) { + case NearestMode::ROUND_PREFER_FLOOR: return "NEAREST_ROUND_PREFER_FLOOR"; + case NearestMode::ROUND_PREFER_CEIL: return "NEAREST_ROUND_PREFER_CEIL"; + case NearestMode::FLOOR: return "NEAREST_FLOOR"; + case NearestMode::CEIL: return "NEAREST_CEIL"; + case NearestMode::SIMPLE: return "NEAREST_SIMPLE"; default: return ""; } } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h index 8c97e0e..ca781c3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h @@ -251,6 +251,8 @@ std::string toString(TileAxis a); std::string toString(GatherAxis a); std::string toString(ScatterUpdateAxis a); std::string toString(ResampleType type); +std::string toString(CoordinateTransformationMode mode); +std::string toString(NearestMode mode); std::string toString(const BorderType type); std::string toString(const Tensor::Dim& dim); std::string toString(const DataTensor& tensor); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp index 7409bd4..be26f9c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp @@ -34,10 +34,78 @@ inline kernel_selector::sample_type convert_to_sample_type(resample_type type) { return kernel_selector::sample_type::CAFFE_BILINEAR_INTERP; case resample_type::bilinear: return kernel_selector::sample_type::BILINEAR_INTERP; + case resample_type::cubic: + return kernel_selector::sample_type::CUBIC; + case resample_type::linear_onnx: + return kernel_selector::sample_type::LINEAR_ONNX; default: return kernel_selector::sample_type::NEAREST_NEIGHBOR; } } + +inline kernel_selector::coordinate_transformation_mode convert_to_coord_transform_mode(coordinate_transformation_mode mode) { + switch (mode) { + case coordinate_transformation_mode::half_pixel: + return kernel_selector::coordinate_transformation_mode::HALF_PIXEL; + case coordinate_transformation_mode::pytorch_half_pixel: + return kernel_selector::coordinate_transformation_mode::PYTORCH_HALF_PIXEL; + case coordinate_transformation_mode::asymmetric: + return kernel_selector::coordinate_transformation_mode::ASYMMETRIC; + case coordinate_transformation_mode::tf_half_pixel_for_nn: + return kernel_selector::coordinate_transformation_mode::TF_HALF_PIXEL_FOR_NN; + case coordinate_transformation_mode::align_corners: + return kernel_selector::coordinate_transformation_mode::ALIGN_CORNERS; + default: + return kernel_selector::coordinate_transformation_mode::HALF_PIXEL; + } +} + +inline kernel_selector::nearest_mode convert_to_nearest_mode(nearest_mode mode) { + switch (mode) { + case nearest_mode::round_prefer_floor: + return kernel_selector::nearest_mode::ROUND_PREFER_FLOOR; + case nearest_mode::round_prefer_ceil: + return kernel_selector::nearest_mode::ROUND_PREFER_CEIL; + case nearest_mode::floor: + return kernel_selector::nearest_mode::FLOOR; + case nearest_mode::ceil: + return kernel_selector::nearest_mode::CEIL; + case nearest_mode::simple: + return kernel_selector::nearest_mode::SIMPLE; + default: + return kernel_selector::nearest_mode::ROUND_PREFER_FLOOR; + } +} + +inline kernel_selector::shape_calculation_mode convert_to_shape_calculation_mode(shape_calculation_mode mode) { + switch (mode) { + case shape_calculation_mode::sizes: + return kernel_selector::shape_calculation_mode::SIZES; + case shape_calculation_mode::scales: + return kernel_selector::shape_calculation_mode::SCALES; + default: + return kernel_selector::shape_calculation_mode::SIZES; + } +} + +inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis axis) { + switch (axis) { + case resample::along_x: + return kernel_selector::interpolate_axis::X; + case resample::along_y: + return kernel_selector::interpolate_axis::Y; + case resample::along_z: + return kernel_selector::interpolate_axis::Z; + case resample::along_w: + return kernel_selector::interpolate_axis::W; + case resample::along_f: + return kernel_selector::interpolate_axis::FEATURE; + case resample::along_b: + return kernel_selector::interpolate_axis::BATCH; + default: + return kernel_selector::interpolate_axis::BATCH; + } +} } // namespace struct resample_gpu : typed_primitive_gpu_impl { @@ -53,11 +121,20 @@ struct resample_gpu : typed_primitive_gpu_impl { if (primitive->with_activation) convert_activation_func_params(primitive, us_params.activations); + size_t dimsNum = arg.get_output_layout().format.dimension(); us_params.resampleType = convert_to_sample_type(primitive->operation_type); + us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode); + us_params.coordTransMode = convert_to_coord_transform_mode(primitive->coord_trans_mode); + us_params.shapeCalculationMode = convert_to_shape_calculation_mode(primitive->shape_calc_mode); + us_params.antialias = primitive->antialias; + us_params.cube_coeff = primitive->cube_coeff; + us_params.pads_begin = primitive->pads_begin.empty() ? std::vector(dimsNum, 0) : primitive->pads_begin; + us_params.pads_end = primitive->pads_end.empty() ? std::vector(dimsNum, 0) : primitive->pads_end; + for (const auto& it : primitive->axesAndScales) { + us_params.axesAndScales[convert_axis(it.first)] = it.second; + } if (primitive->operation_type == resample_type::bilinear) { - us_params.pad_begin = primitive->pad_begin; - us_params.pad_end = primitive->pad_end; us_params.align_corners = primitive->align_corners; } diff --git a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h index d4fa358..fc1a657 100644 --- a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h +++ b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h @@ -76,6 +76,10 @@ using concat_axis = kernel_selector::ConcatAxis; using tile_axis = kernel_selector::TileAxis; using tuning_mode = kernel_selector::TuningMode; using sample_type = kernel_selector::ResampleType; +using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode; +using nearest_mode = kernel_selector::NearestMode; +using shape_calculation_mode = kernel_selector::ShapeCalculationMode; +using interpolate_axis = kernel_selector::InterpolateAxis; using border_type = kernel_selector::BorderType; using gather_axis = kernel_selector::GatherAxis; using scatter_update_axis = kernel_selector::ScatterUpdateAxis; diff --git a/inference-engine/thirdparty/clDNN/src/resample.cpp b/inference-engine/thirdparty/clDNN/src/resample.cpp index ac33f7c..f626120 100644 --- a/inference-engine/thirdparty/clDNN/src/resample.cpp +++ b/inference-engine/thirdparty/clDNN/src/resample.cpp @@ -62,17 +62,67 @@ std::string resample_inst::to_string(resample_node const& node) { resample_info.add("resample_type:", "bilinear_interp"); else if (desc->operation_type == resample_type::caffe_bilinear) resample_info.add("resample_type:", "caffe_bilinear_interp"); + else if (desc->operation_type == resample_type::cubic) + resample_info.add("resample_type:", "cubic"); else resample_info.add("resample_type:", "not supported sample type"); + if (desc->shape_calc_mode == shape_calculation_mode::sizes) + resample_info.add("shape_calculation_mode:", "sizes"); + else + resample_info.add("shape_calculation_mode:", "scales"); + + if (desc->shape_calc_mode == shape_calculation_mode::scales) { + std::string axesAndScalesDump; + std::string delim = ""; + for (auto& it : desc->axesAndScales) { + axesAndScalesDump += delim; + delim = ", "; + if (it.first == resample::resample_axis::along_b) + axesAndScalesDump += "b: "; + else if (it.first == resample::resample_axis::along_f) + axesAndScalesDump += "f: "; + else if (it.first == resample::resample_axis::along_x) + axesAndScalesDump += "x: "; + else if (it.first == resample::resample_axis::along_y) + axesAndScalesDump += "y: "; + else if (it.first == resample::resample_axis::along_z) + axesAndScalesDump += "z: "; + else + axesAndScalesDump += "w: "; + axesAndScalesDump += std::to_string(it.second); + } + resample_info.add("scales:", axesAndScalesDump); + } + + if (desc->coord_trans_mode == coordinate_transformation_mode::half_pixel) + resample_info.add("coordinate_transformation_mode:", "half_pixel"); + else if (desc->coord_trans_mode == coordinate_transformation_mode::pytorch_half_pixel) + resample_info.add("coordinate_transformation_mode:", "pytorch_half_pixel"); + else if (desc->coord_trans_mode == coordinate_transformation_mode::tf_half_pixel_for_nn) + resample_info.add("coordinate_transformation_mode:", "tf_half_pixel_for_nn"); + else if (desc->coord_trans_mode == coordinate_transformation_mode::align_corners) + resample_info.add("coordinate_transformation_mode:", "align_corners"); + else + resample_info.add("coordinate_transformation_mode:", "asymmetric"); + + if (desc->round_mode == nearest_mode::round_prefer_floor) + resample_info.add("nearest_mode:", "round_prefer_floor"); + if (desc->round_mode == nearest_mode::round_prefer_ceil) + resample_info.add("nearest_mode:", "round_prefer_ceil"); + if (desc->round_mode == nearest_mode::floor) + resample_info.add("nearest_mode:", "floor"); + if (desc->round_mode == nearest_mode::ceil) + resample_info.add("nearest_mode:", "ceil"); + else + resample_info.add("nearest_mode:", "simple"); + resample_info.add("output_size", desc->output_size); resample_info.add("with activation", desc->with_activation); resample_info.add("output padding lower size", desc->output_padding.lower_size()); resample_info.add("output padding upper size", desc->output_padding.upper_size()); if (desc->operation_type == resample_type::bilinear) { - resample_info.add("pad_begin", desc->pad_begin); - resample_info.add("pad_end", desc->pad_end); resample_info.add("align_corners", desc->align_corners); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp index f6c6dd4..cad7778 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp @@ -91,10 +91,10 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) { 12.f,12.f, 9.f, 9.f, -17.f, -17.f, }; - for (int i = 0; i < 2; ++i) { //B - for (int j = 0; j < 2; ++j) { //F - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 6; ++l) { //X + for (int i = 0; i < 2; ++i) { // B + for (int j = 0; j < 2; ++j) { // F + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6; EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])); } @@ -146,8 +146,8 @@ TEST(resample_gpu, basic_in2x3x2x2_bilinear) { 3.f, 3.25f, 3.75f, 4.f, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 4; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 4; ++l) { // X auto linear_id = l + k * 4; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F); } @@ -172,7 +172,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp) { topology topology; topology.add(input_layout("input", input.get_layout())); - topology.add(resample("upsampling", "input", output_size, 0, 0, 0, resample_type::bilinear)); + topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear)); set_values(input, { 1.f, 2.f, @@ -196,8 +196,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp) { 3.0f, 3.5f, 4.0f, 4.0f, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 4; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 4; ++l) { // X auto linear_id = l + k * 4; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F); } @@ -223,7 +223,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_f16) { topology topology; topology.add(input_layout("input", input.get_layout())); topology.add(reorder("input_to_b_fs_yx_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32)); - topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, 0, 0, 0, resample_type::bilinear)); + topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear)); topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32)); set_values(input, { @@ -254,8 +254,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_f16) { 3.0f, 3.5f, 4.0f, 4.0f, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 4; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 4; ++l) { // X auto linear_id = l + k * 4; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F); } @@ -281,7 +281,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_fsv32) { topology topology; topology.add(input_layout("input", input.get_layout())); topology.add(reorder("input_to_fs_b_yx_fsv32", "input", format::fs_b_yx_fsv32, data_types::f16)); - topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, 0, 0, 0, resample_type::bilinear)); + topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear)); topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32)); set_values(input, { @@ -312,8 +312,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_fsv32) { 3.0f, 3.5f, 4.0f, 4.0f, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 4; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 4; ++l) { // X auto linear_id = l + k * 4; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F); } @@ -339,7 +339,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_align_1) { topology topology; topology.add(input_layout("input", input.get_layout())); - topology.add(resample("upsampling", "input", output_size, 0, 0, 1, resample_type::bilinear)); + topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 1, resample_type::bilinear)); set_values(input, { 1.f, 2.f, @@ -363,8 +363,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_align_1) { 3.000000f, 3.333333f, 3.666667f, 4.000000f }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 4; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 4; ++l) { // X auto linear_id = l + k * 4; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F); } @@ -414,8 +414,8 @@ TEST(resample_gpu, nearest_asymmetric) { 3.f, 3.f, 3.f, 4.f, 4.f, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 5; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 5; ++l) { // X auto linear_id = l + k * 5; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F); } @@ -465,8 +465,8 @@ TEST(resample_gpu, nearest_asymmetric_i8) { 3, 3, 3, 4, 4, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 5; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 5; ++l) { // X auto linear_id = l + k * 5; EXPECT_EQ(answers[linear_id], output_ptr[linear_id]); } @@ -516,8 +516,8 @@ TEST(resample_gpu, bilinear_asymmetric) { 3.f, 3.f, 3.33333f, 3.66667f, 4.f, 4.f, }; - for (int k = 0; k < 4; ++k) { //Y - for (int l = 0; l < 6; ++l) { //X + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X auto linear_id = l + k * 6; EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 5e-03F) << l << " " << k; } @@ -776,3 +776,1082 @@ INSTANTIATE_TEST_CASE_P(smoke, .smoke_params(data_types::i8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16) .smoke_params(data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16) ), ); + +TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel; + nearest_mode nm = nearest_mode::ceil; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + float answers[96] = { + 0.f, 1.f, 1.f, 1.f, + 2.f, 3.f, 3.f, 3.f, + 2.f, 3.f, 3.f, 3.f, + 4.f, 5.f, 5.f, 5.f, + 4.f, 5.f, 5.f, 5.f, + 4.f, 5.f, 5.f, 5.f, + + 6.f, 7.f, 7.f, 7.f, + 8.f, 9.f, 9.f, 9.f, + 8.f, 9.f, 9.f, 9.f, + 10.f, 11.f, 11.f, 11.f, + 10.f, 11.f, 11.f, 11.f, + 10.f, 11.f, 11.f, 11.f, + + 12.f, 13.f, 13.f, 13.f, + 14.f, 15.f, 15.f, 15.f, + 14.f, 15.f, 15.f, 15.f, + 16.f, 17.f, 17.f, 17.f, + 16.f, 17.f, 17.f, 17.f, + 16.f, 17.f, 17.f, 17.f, + + 18.f, 19.f, 19.f, 19.f, + 20.f, 21.f, 21.f, 21.f, + 20.f, 21.f, 21.f, 21.f, + 22.f, 23.f, 23.f, 23.f, + 22.f, 23.f, 23.f, 23.f, + 22.f, 23.f, 23.f, 23.f, + }; + + for (int i = 0; i < 2; ++i) { // B + for (int j = 0; j < 2; ++j) { // F + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X + auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6; + EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id; + } + } + } + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel; + nearest_mode nm = nearest_mode::round_prefer_floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + float answers[96] = { + 0.f, 0.f, 1.f, 1.f, + 0.f, 0.f, 1.f, 1.f, + 2.f, 2.f, 3.f, 3.f, + 2.f, 2.f, 3.f, 3.f, + 4.f, 4.f, 5.f, 5.f, + 4.f, 4.f, 5.f, 5.f, + + 6.f, 6.f, 7.f, 7.f, + 6.f, 6.f, 7.f, 7.f, + 8.f, 8.f, 9.f, 9.f, + 8.f, 8.f, 9.f, 9.f, + 10.f, 10.f, 11.f, 11.f, + 10.f, 10.f, 11.f, 11.f, + + 12.f, 12.f, 13.f, 13.f, + 12.f, 12.f, 13.f, 13.f, + 14.f, 14.f, 15.f, 15.f, + 14.f, 14.f, 15.f, 15.f, + 16.f, 16.f, 17.f, 17.f, + 16.f, 16.f, 17.f, 17.f, + + 18.f, 18.f, 19.f, 19.f, + 18.f, 18.f, 19.f, 19.f, + 20.f, 20.f, 21.f, 21.f, + 20.f, 20.f, 21.f, 21.f, + 22.f, 22.f, 23.f, 23.f, + 22.f, 22.f, 23.f, 23.f, + }; + + for (int i = 0; i < 2; ++i) { // B + for (int j = 0; j < 2; ++j) { // F + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X + auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6; + EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id; + } + } + } + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel; + nearest_mode nm = nearest_mode::round_prefer_ceil; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + float answers[96] = { + 0.f, 0.f, 1.f, 1.f, + 0.f, 0.f, 1.f, 1.f, + 2.f, 2.f, 3.f, 3.f, + 2.f, 2.f, 3.f, 3.f, + 4.f, 4.f, 5.f, 5.f, + 4.f, 4.f, 5.f, 5.f, + + 6.f, 6.f, 7.f, 7.f, + 6.f, 6.f, 7.f, 7.f, + 8.f, 8.f, 9.f, 9.f, + 8.f, 8.f, 9.f, 9.f, + 10.f, 10.f, 11.f, 11.f, + 10.f, 10.f, 11.f, 11.f, + + 12.f, 12.f, 13.f, 13.f, + 12.f, 12.f, 13.f, 13.f, + 14.f, 14.f, 15.f, 15.f, + 14.f, 14.f, 15.f, 15.f, + 16.f, 16.f, 17.f, 17.f, + 16.f, 16.f, 17.f, 17.f, + + 18.f, 18.f, 19.f, 19.f, + 18.f, 18.f, 19.f, 19.f, + 20.f, 20.f, 21.f, 21.f, + 20.f, 20.f, 21.f, 21.f, + 22.f, 22.f, 23.f, 23.f, + 22.f, 22.f, 23.f, 23.f, + }; + + for (int i = 0; i < 2; ++i) { // B + for (int j = 0; j < 2; ++j) { // F + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X + auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6; + EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id; + } + } + } + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel; + nearest_mode nm = nearest_mode::floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + float answers[96] = { + 0.f, 0.f, 0.f, 1.f, + 0.f, 0.f, 0.f, 1.f, + 0.f, 0.f, 0.f, 1.f, + 2.f, 2.f, 2.f, 3.f, + 2.f, 2.f, 2.f, 3.f, + 4.f, 4.f, 4.f, 5.f, + + 6.f, 6.f, 6.f, 7.f, + 6.f, 6.f, 6.f, 7.f, + 6.f, 6.f, 6.f, 7.f, + 8.f, 8.f, 8.f, 9.f, + 8.f, 8.f, 8.f, 9.f, + 10.f, 10.f, 10.f, 11.f, + + 12.f, 12.f, 12.f, 13.f, + 12.f, 12.f, 12.f, 13.f, + 12.f, 12.f, 12.f, 13.f, + 14.f, 14.f, 14.f, 15.f, + 14.f, 14.f, 14.f, 15.f, + 16.f, 16.f, 16.f, 17.f, + + 18.f, 18.f, 18.f, 19.f, + 18.f, 18.f, 18.f, 19.f, + 18.f, 18.f, 18.f, 19.f, + 20.f, 20.f, 20.f, 21.f, + 20.f, 20.f, 20.f, 21.f, + 22.f, 22.f, 22.f, 23.f, + }; + + for (int i = 0; i < 2; ++i) { // B + for (int j = 0; j < 2; ++j) { // F + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X + auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6; + EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id; + } + } + } + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel; + nearest_mode nm = nearest_mode::simple; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + float answers[96] = { + 0.f, 0.f, 0.f, 1.f, + 0.f, 0.f, 0.f, 1.f, + 0.f, 0.f, 0.f, 1.f, + 2.f, 2.f, 2.f, 3.f, + 2.f, 2.f, 2.f, 3.f, + 4.f, 4.f, 4.f, 5.f, + + 6.f, 6.f, 6.f, 7.f, + 6.f, 6.f, 6.f, 7.f, + 6.f, 6.f, 6.f, 7.f, + 8.f, 8.f, 8.f, 9.f, + 8.f, 8.f, 8.f, 9.f, + 10.f, 10.f, 10.f, 11.f, + + 12.f, 12.f, 12.f, 13.f, + 12.f, 12.f, 12.f, 13.f, + 12.f, 12.f, 12.f, 13.f, + 14.f, 14.f, 14.f, 15.f, + 14.f, 14.f, 14.f, 15.f, + 16.f, 16.f, 16.f, 17.f, + + 18.f, 18.f, 18.f, 19.f, + 18.f, 18.f, 18.f, 19.f, + 18.f, 18.f, 18.f, 19.f, + 20.f, 20.f, 20.f, 21.f, + 20.f, 20.f, 20.f, 21.f, + 22.f, 22.f, 22.f, 23.f, + }; + + for (int i = 0; i < 2; ++i) { // B + for (int j = 0; j < 2; ++j) { // F + for (int k = 0; k < 4; ++k) { // Y + for (int l = 0; l < 6; ++l) { // X + auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6; + EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id; + } + } + } + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 2; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel; + nearest_mode nm = nearest_mode::round_prefer_floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0.f, 0.f, 1.f, + 4.f, 4.f, 5.f, + + 6.f, 6.f, 7.f, + 10.f, 10.f, 11.f, + + 12.f, 12.f, 13.f, + 16.f, 16.f, 17.f, + + 18.f, 18.f, 19.f, + 22.f, 22.f, 23.f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 1; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::pytorch_half_pixel; + nearest_mode nm = nearest_mode::round_prefer_floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0.f, 0.f, 1.f, + 6.f, 6.f, 7.f, + + 12.f, 12.f, 13.f, + 18.f, 18.f, 19.f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 2; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::asymmetric; + nearest_mode nm = nearest_mode::round_prefer_floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0.f, 1.f, 1.f, + 2.f, 3.f, 3.f, + + 6.f, 7.f, 7.f, + 8.f, 9.f, 9.f, + + 12.f, 13.f, 13.f, + 14.f, 15.f, 15.f, + + 18.f, 19.f, 19.f, + 20.f, 21.f, 21.f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 2; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::tf_half_pixel_for_nn; + nearest_mode nm = nearest_mode::round_prefer_floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 2.f, 3.f, 3.f, + 4.f, 5.f, 5.f, + + 8.f, 9.f, 9.f, + 10.f, 11.f, 11.f, + + 14.f, 15.f, 15.f, + 16.f, 17.f, 17.f, + + 20.f, 21.f, 21.f, + 22.f, 23.f, 23.f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 2; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::nearest; + coordinate_transformation_mode ctm = coordinate_transformation_mode::align_corners; + nearest_mode nm = nearest_mode::round_prefer_floor; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0.f, 0.f, 1.f, + 4.f, 4.f, 5.f, + + 6.f, 6.f, 7.f, + 10.f, 10.f, 11.f, + + 12.f, 12.f, 13.f, + 16.f, 16.f, 17.f, + + 18.f, 18.f, 19.f, + 22.f, 22.f, 23.f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_cubic) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 2; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::cubic; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0.29600694f, 0.8828125f, 1.46961806f, + 3.53038194f, 4.1171875f, 4.70399306f, + + 6.29600694f, 6.8828125f, 7.46961806f, + 9.53038194f, 10.1171875f, 10.70399306f, + + 12.29600694f, 12.8828125f, 13.46961806f, + 15.53038194f, 16.1171875f, 16.70399306f, + + 18.29600694f, 18.8828125f, 19.46961806f, + 21.53038194f, 22.1171875f, 22.70399306f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 1; + int f = 1; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::cubic; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode)); + + set_values(input, { + 5.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 5.34722222f, 3.f, 0.65277778f, + 1.91319444f, 2.5f, 3.08680556f, + 3.91319444f, 4.5f, 5.08680556f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_linear) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 2; + int f = 2; + int y = 3; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 2; + x = 3; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::caffe_bilinear; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode)); + + set_values(input, { + 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0.5f, 1.f, 1.5f, + 3.5f, 4.f, 4.5f, + + 6.5f, 7.f, 7.5f, + 9.5f, 10.f, 10.5f, + + 12.5f, 13.f, 13.5f, + 15.5f, 16.f, 16.5f, + + 18.5f, 19.f, 19.5f, + 21.5f, 22.f, 22.5f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in2x2x3x2_linear_onnx) { + // Input : 2x2x3x2 + // Output : 2x2x6x4 + // Sample Type: Nearest + + const auto& engine = get_test_engine(); + + int b = 1; + int f = 1; + int y = 2; + int x = 2; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 4; + x = 4; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::bilinear; + coordinate_transformation_mode ctm = coordinate_transformation_mode::asymmetric; + resample::AxesAndScales axesAndScales; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm)); + + set_values(input, { + 1.f, 2.f, + 3.f, 4.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 1.f, 1.33333f, 1.66667f, 2.f, + 1.66667f, 2.f, 2.33333f, 2.66667f, + 2.33333f, 2.66667f, 3.f, 3.33333f, + 3.f, 3.33333f, 3.66667f, 4.f, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) { + // Input : 1x1x2x4 + // Output : 1x1x1x2 + // Sample Type: Linear + + const auto& engine = get_test_engine(); + + int b = 1; + int f = 1; + int y = 2; + int x = 4; + tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + + y = 1; + x = 2; + auto output_size = tensor(batch(b), feature(f), spatial(x, y)); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + int32_t antialias = 0; + float cube_coeff = -0.75f; + resample_type mode = resample_type::caffe_bilinear; + resample::AxesAndScales axesAndScales = { + {cldnn::resample::resample_axis::along_y, 0.6f}, + {cldnn::resample::resample_axis::along_x, 0.6f}, + }; + shape_calculation_mode shapeCalcMode = shape_calculation_mode::scales; + topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode)); + + set_values(input, { + 1.f, 2.f, 3.f, 4.f, + 5.f, 6.f, 7.f, 8.f, + }); + + cldnn::network net {engine, topology }; + + net.set_input_data("input", input); + + auto outputs = net.execute(); + + auto output = outputs.at("interpolate").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 2.6666665f, 4.3333331f + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp index 8195a07..703ffea 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp @@ -590,4 +590,4 @@ namespace ngraph } } } -} \ No newline at end of file +} -- 2.7.4