Support operation Interpolate-4 in OpenVINO (#1596)
authorEgor Churaev <egor.churaev@intel.com>
Thu, 1 Oct 2020 08:41:51 +0000 (11:41 +0300)
committerGitHub <noreply@github.com>
Thu, 1 Oct 2020 08:41:51 +0000 (11:41 +0300)
JIRA: 26973

24 files changed:
inference-engine/src/cldnn_engine/cldnn_program.cpp
inference-engine/src/cldnn_engine/cldnn_program.h
inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp
inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
inference-engine/tests/functional/plugin/shared/include/single_layer_tests/interpolate.hpp
inference-engine/tests/functional/plugin/shared/src/single_layer_tests/interpolate.cpp
inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
inference-engine/thirdparty/clDNN/api/resample.hpp
inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_opt.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h
inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp
inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
inference-engine/thirdparty/clDNN/src/resample.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp
ngraph/core/reference/include/ngraph/runtime/reference/interpolate.hpp

index d6405ed..b78288b 100644 (file)
@@ -770,6 +770,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
         { "Copy" , Copy },
         { "Resample" , Resample },
         { "Interp" , Interp },
+        { "Interpolate" , Interpolate },
         { "RegionYolo" , RegionYolo },
         { "ReorgYolo" , ReorgYolo },
         { "Const" , ConstantBlob },
@@ -1469,6 +1470,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
             break;
         case Interp: CreateInterpPrimitive(topology, layer);
             break;
+        case Interpolate: CreateInterpolatePrimitive(topology, layer);
+            break;
         case ArgMax:
         case ArgMin:
             CreateArgMaxMinPrimitive(topology, layer, LayerTypeFromStr(layer->type));
@@ -3342,8 +3345,14 @@ void Program::CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::
     auto outDims = layer->outData[0]->getTensorDesc().getDims();
     auto outTensor = CldnnTensorFromIEDims(outDims);
 
+    std::vector<int> pads_begin(outDims.size(), 0);
+    std::vector<int> pads_end(outDims.size(), 0);
     int pad_begin = interpLayer->GetParamAsInt("pad_beg_", 0);
     int pad_end = interpLayer->GetParamAsInt("pad_end_", 0);
+    for (size_t i = 2; i < pads_begin.size(); ++i) {
+        pads_begin[i] = pad_begin;
+        pads_end[i] = pad_end;
+    }
     int align_corners = interpLayer->GetParamAsInt("align_corners", 1);
 
     std::string resampleLayerName = layer_type_name_ID(layer);
@@ -3352,8 +3361,8 @@ void Program::CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::
         resampleLayerName,
         inputPrimitives[0],
         outTensor,
-        pad_begin,
-        pad_end,
+        pads_begin,
+        pads_end,
         align_corners,
         cldnn::resample_type::bilinear);
 
@@ -3361,6 +3370,202 @@ void Program::CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::
     AddPrimitiveToProfiler(resampleLayerName, layer);
 }
 
+static cldnn::coordinate_transformation_mode CoordinateTransformationModeFromString(const std::string &str) {
+    static const caseless_map<std::string, cldnn::coordinate_transformation_mode> CoordTransformationMode = {
+        { "half_pixel" , cldnn::coordinate_transformation_mode::half_pixel },
+        { "pytorch_half_pixel" , cldnn::coordinate_transformation_mode::pytorch_half_pixel },
+        { "asymmetric" , cldnn::coordinate_transformation_mode::asymmetric },
+        { "tf_half_pixel_for_nn" , cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn },
+        { "align_corners" , cldnn::coordinate_transformation_mode::align_corners },
+    };
+    auto it = CoordTransformationMode.find(str);
+    if (it != CoordTransformationMode.end())
+        return it->second;
+    else
+        THROW_CLDNN_EXCEPTION("Unknown coordinate transformation mode: " << str);
+}
+
+static cldnn::nearest_mode NearestModeFromString(const std::string &str) {
+    static const caseless_map<std::string, cldnn::nearest_mode> NearestMode = {
+        { "round_prefer_floor" , cldnn::nearest_mode::round_prefer_floor },
+        { "round_prefer_ceil" , cldnn::nearest_mode::round_prefer_ceil },
+        { "floor" , cldnn::nearest_mode::floor },
+        { "ceil" , cldnn::nearest_mode::ceil },
+        { "simple" , cldnn::nearest_mode::simple },
+    };
+    auto it = NearestMode.find(str);
+    if (it != NearestMode.end())
+        return it->second;
+    else
+        THROW_CLDNN_EXCEPTION("Unknown nearest mode: " << str);
+}
+
+static cldnn::shape_calculation_mode ShapeCalculationModeFromString(const std::string &str) {
+    static const caseless_map<std::string, cldnn::shape_calculation_mode> shapeCalcMode = {
+        { "sizes" , cldnn::shape_calculation_mode::sizes },
+        { "scales" , cldnn::shape_calculation_mode::scales },
+    };
+    auto it = shapeCalcMode.find(str);
+    if (it != shapeCalcMode.end())
+        return it->second;
+    else
+        THROW_CLDNN_EXCEPTION("Unknown shape calculation mode: " << str);
+}
+
+inline cldnn::resample::resample_axis InterpolateAxisFromIEAxis(int axis, unsigned sz) {
+    if (axis < 0)
+        axis += sz;
+    if (axis < 0 || axis >= sz)
+        THROW_CLDNN_EXCEPTION("Interpolate axis is not correspond to number of dimensions");
+
+    // Difference in dimension ordering between IE and clDNN,
+    // reverse spatial dimensions after batch and feature.
+    unsigned cldnn_axis = axis;
+    if (axis >= 2) {
+        auto spatial_axis = axis - 2;
+        // Default and minimum number of dimensions is 4
+        auto spatial_size = std::max(sz, 4u) - 2;
+        cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+    }
+
+    switch (cldnn_axis) {
+        case 0:
+            return cldnn::resample::resample_axis::along_b;
+        case 1:
+            return cldnn::resample::resample_axis::along_f;
+        case 2:
+            return cldnn::resample::resample_axis::along_x;
+        case 3:
+            return cldnn::resample::resample_axis::along_y;
+        case 4:
+            return cldnn::resample::resample_axis::along_z;
+        case 5:
+            return cldnn::resample::resample_axis::along_w;
+        default:
+            break;
+    }
+    THROW_CLDNN_EXCEPTION("Unsupported Interpolate axis: " << axis);
+}
+
+void Program::CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
+    ValidateLayer(layer, {3, 4});
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto interpolateLayer = as<InferenceEngine::GenericLayer*> (layer);
+
+    std::shared_ptr<Data> insData0 = layer->insData[0].lock();
+    IE_ASSERT(insData0 != nullptr);
+    auto insData0dims = insData0->getTensorDesc().getDims();
+    auto outDims = layer->outData[0]->getTensorDesc().getDims();
+    auto outTensor = CldnnTensorFromIEDims(outDims);
+
+    auto pads_begin = interpolateLayer->GetParamAsInts("pads_begin", {});
+    auto pads_end = interpolateLayer->GetParamAsInts("pads_end", {});
+    for (size_t i = pads_begin.size(); i < outDims.size() || i < 4; ++i)
+        pads_begin.push_back(0);
+    for (size_t i = pads_end.size(); i < outDims.size() || i < 4; ++i)
+        pads_end.push_back(0);
+    std::string mode = interpolateLayer->GetParamAsString("mode");
+    std::string shape_calc_mode = interpolateLayer->GetParamAsString("shape_calculation_mode");
+    std::string coordinate_trans_mode = interpolateLayer->GetParamAsString("coordinate_transformation_mode", "half_pixel");
+    std::string nearest_mode = interpolateLayer->GetParamAsString("nearest_mode", "round_prefer_floor");
+    int antialias = interpolateLayer->GetParamAsBool("antialias", false);
+    float cube_coeff = interpolateLayer->GetParamAsFloat("cube_coeff", -0.75f);
+
+    std::string resampleLayerName = layer_type_name_ID(layer);
+    auto cldnnSampleType = ResampleTypeFromString(mode);
+    auto shapeCalcMode = ShapeCalculationModeFromString(shape_calc_mode);
+    auto coordTransMode = CoordinateTransformationModeFromString(coordinate_trans_mode);
+    auto nearestMode = NearestModeFromString(nearest_mode);
+
+    std::vector<float> scales;
+    auto scalesInput = layer->insData[2].lock();
+    auto scalesInputCreator = getCreatorLayer(scalesInput).lock();
+    if (scalesInputCreator->blobs.size() == 1) {
+        auto constantBlob = scalesInputCreator->blobs.begin()->second;
+        auto axesPrecision = constantBlob->getTensorDesc().getPrecision();
+        if (axesPrecision == InferenceEngine::Precision::FP32) {
+            auto data = constantBlob->buffer().as<float*>();
+            for (size_t i = 0; i < constantBlob->size(); ++i)
+                scales.push_back(data[i]);
+        } else {
+            THROW_IE_EXCEPTION << layer->name << " Incorrect scales input precision";
+        }
+    }
+
+    std::vector<cldnn::resample::resample_axis> axes;
+    if (inputPrimitives.size() == 4) {
+        auto axesInput = layer->insData[3].lock();
+        auto axesInputCreator = getCreatorLayer(axesInput).lock();
+        if (axesInputCreator->blobs.size() == 1) {
+            auto constantBlob = axesInputCreator->blobs.begin()->second;
+            auto axesPrecision = constantBlob->getTensorDesc().getPrecision();
+            if (axesPrecision == InferenceEngine::Precision::I32) {
+                auto data = constantBlob->buffer().as<int32_t*>();
+                for (size_t i = 0; i < constantBlob->size(); ++i)
+                    axes.push_back(InterpolateAxisFromIEAxis(data[i], insData0dims.size()));
+            } else if (axesPrecision == InferenceEngine::Precision::I64) {
+                auto data = constantBlob->buffer().as<int64_t*>();
+                for (size_t i = 0; i < constantBlob->size(); ++i)
+                    axes.push_back(InterpolateAxisFromIEAxis(static_cast<int32_t>(data[i]), insData0dims.size()));
+            } else {
+                THROW_IE_EXCEPTION << layer->name
+                                   << " Incorrect axes input precision";
+            }
+        }
+    } else {
+        for (int i = 0; i < insData0dims.size(); ++i) {
+            axes.push_back(InterpolateAxisFromIEAxis(i, insData0dims.size()));
+        }
+    }
+
+    if (axes.size() != scales.size())
+        THROW_IE_EXCEPTION << layer->name << " Incorrect axes and scales should be the same size";
+
+    cldnn::resample::AxesAndScales axesAndScales;
+    for (size_t i = 0; i < axes.size(); ++i) {
+        axesAndScales[axes[i]] = scales[i];
+    }
+
+    if (cldnnSampleType == cldnn::resample_type::linear_onnx) {
+        if (insData0dims.size() != 2 && insData0dims.size() != 4)
+            THROW_CLDNN_EXCEPTION("mode 'linear_onnx' supports only 2D or 4D tensors");
+        if (axes.size() != 2 && insData0dims.size() != axes.size())
+            THROW_CLDNN_EXCEPTION("mode 'linear_onnx' supports only axes with size 2 or equal to input rank");
+        bool correctAxes =
+            ((axes[0] == cldnn::resample::resample_axis::along_b) &&
+             (axes[1] == cldnn::resample::resample_axis::along_f)) ||
+            ((axes[0] == cldnn::resample::resample_axis::along_y) &&
+             (axes[1] == cldnn::resample::resample_axis::along_x));
+        if (axes.size() == 4 && insData0dims.size() == 4) {
+            correctAxes = axes[0] == cldnn::resample::resample_axis::along_b &&
+                          axes[1] == cldnn::resample::resample_axis::along_f &&
+                          axes[2] == cldnn::resample::resample_axis::along_y &&
+                          axes[3] == cldnn::resample::resample_axis::along_x;
+        }
+        if (!correctAxes)
+            THROW_CLDNN_EXCEPTION(
+                "mode 'linear_onnx' supports only case when axes = {2, 3} or "
+                "axes = {0, 1} or axes = {0, 1, 2, 3}");
+    }
+
+    auto resamplePrim = cldnn::resample(
+        resampleLayerName,
+        inputPrimitives[0],
+        outTensor,
+        axesAndScales,
+        pads_begin,
+        pads_end,
+        antialias,
+        cube_coeff,
+        cldnnSampleType,
+        shapeCalcMode,
+        coordTransMode,
+        nearestMode);
+
+    topology.add(resamplePrim);
+    AddPrimitiveToProfiler(resampleLayerName, layer);
+}
+
 void Program::CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
@@ -5676,6 +5881,10 @@ cldnn::resample_type Program::ResampleTypeFromString(const std::string &str) {
         { "caffe.ResampleParameter.LINEAR" , cldnn::resample_type::caffe_bilinear },
         { "caffe.ResampleParameter.NEAREST" , cldnn::resample_type::nearest },
         { "Interp" , cldnn::resample_type::bilinear },
+        { "linear" , cldnn::resample_type::caffe_bilinear },
+        { "linear_onnx" , cldnn::resample_type::linear_onnx },
+        { "cubic" , cldnn::resample_type::cubic },
+        { "nearest" , cldnn::resample_type::nearest },
     };
     auto it = UpsamplingTypeNameToType.find(str);
     if (it != UpsamplingTypeNameToType.end())
index 989a3c6..67a466c 100644 (file)
@@ -163,6 +163,7 @@ public:
         Copy,
         Resample,
         Interp,
+        Interpolate,
         RegionYolo,
         ReorgYolo,
         ConstantBlob,
@@ -346,6 +347,7 @@ private:
     void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
+    void CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
index b11efc5..1c0e1c7 100644 (file)
@@ -42,6 +42,11 @@ const std::vector<ngraph::op::v4::Interpolate::CoordinateTransformMode> coordina
         ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners,
 };
 
+const std::vector<ngraph::op::v4::Interpolate::ShapeCalcMode> shapeCalculationMode = {
+        ngraph::op::v4::Interpolate::ShapeCalcMode::sizes,
+        ngraph::op::v4::Interpolate::ShapeCalcMode::scales,
+};
+
 const std::vector<ngraph::op::v4::Interpolate::NearestMode> nearestModes = {
         ngraph::op::v4::Interpolate::NearestMode::simple,
         ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
@@ -55,7 +60,7 @@ const std::vector<ngraph::op::v4::Interpolate::NearestMode> defaultNearestMode =
 };
 
 const std::vector<std::vector<size_t>> pads = {
-        // {0, 0, 1, 1},
+        {0, 0, 1, 1},
         {0, 0, 0, 0},
 };
 
@@ -69,23 +74,37 @@ const std::vector<double> cubeCoefs = {
         -0.75f,
 };
 
+const std::vector<std::vector<int64_t>> defaultAxes = {
+    {2, 3}
+};
+
+const std::vector<std::vector<float>> defaultScales = {
+    {1.33333f, 1.33333f}
+};
+
 const auto interpolateCasesWithoutNearest = ::testing::Combine(
         ::testing::ValuesIn(modesWithoutNearest),
+        ::testing::ValuesIn(shapeCalculationMode),
         ::testing::ValuesIn(coordinateTransformModes),
         ::testing::ValuesIn(defaultNearestMode),
         ::testing::ValuesIn(antialias),
         ::testing::ValuesIn(pads),
         ::testing::ValuesIn(pads),
-        ::testing::ValuesIn(cubeCoefs));
+        ::testing::ValuesIn(cubeCoefs),
+        ::testing::ValuesIn(defaultAxes),
+        ::testing::ValuesIn(defaultScales));
 
 const auto interpolateCases = ::testing::Combine(
         ::testing::ValuesIn(nearestMode),
+        ::testing::ValuesIn(shapeCalculationMode),
         ::testing::ValuesIn(coordinateTransformModes),
         ::testing::ValuesIn(nearestModes),
         ::testing::ValuesIn(antialias),
         ::testing::ValuesIn(pads),
         ::testing::ValuesIn(pads),
-        ::testing::ValuesIn(cubeCoefs));
+        ::testing::ValuesIn(cubeCoefs),
+        ::testing::ValuesIn(defaultAxes),
+        ::testing::ValuesIn(defaultScales));
 
 INSTANTIATE_TEST_CASE_P(Interpolate_Basic, InterpolateLayerTest, ::testing::Combine(
         interpolateCasesWithoutNearest,
index bc0a1db..1904903 100644 (file)
@@ -46,33 +46,37 @@ protected:
         std::tie(interpolateParams, netPrecision, inputShape, targetShape, targetDevice) = basicParamsSet;
 
         ngraph::op::v4::Interpolate::InterpolateMode mode;
+        ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode;
         ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode;
         ngraph::op::v4::Interpolate::NearestMode nearestMode;
         bool antialias;
         std::vector<size_t> padBegin, padEnd;
         double cubeCoef;
-        std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams;
+        std::vector<int64_t> axes;
+        std::vector<float> scales;
+        std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams;
 
         using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode;
-        ShapeCalcMode shape_calc_mode = ShapeCalcMode::sizes;
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
         auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
 
         auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape);
 
-        std::vector<float> scales(targetShape.size(), 1.0f);
         auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales);
 
         auto scalesInput = std::make_shared<ngraph::opset3::Constant>(scales_const);
 
         auto secondaryInput = std::make_shared<ngraph::opset3::Constant>(constant);
 
-        ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shape_calc_mode, padBegin,
+        auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes);
+        auto axesInput = std::make_shared<ngraph::opset3::Constant>(axesConst);
+        ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin,
             padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef};
         auto interpolate = std::make_shared<ngraph::op::v4::Interpolate>(params[0],
                                                                          secondaryInput,
                                                                          scalesInput,
+                                                                         axesInput,
                                                                          interpolateAttributes);
         interpolate->get_rt_info() = CPUTestsBase::setCPUInfo(inFmts, outFmts, priority);
         const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(interpolate)};
@@ -126,6 +130,11 @@ const std::vector<ngraph::op::v4::Interpolate::CoordinateTransformMode> coordina
         ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners,
 };
 
+const std::vector<ngraph::op::v4::Interpolate::ShapeCalcMode> shapeCalculationMode = {
+        ngraph::op::v4::Interpolate::ShapeCalcMode::sizes,
+        ngraph::op::v4::Interpolate::ShapeCalcMode::scales,
+};
+
 const std::vector<ngraph::op::v4::Interpolate::NearestMode> nearestModes = {
         ngraph::op::v4::Interpolate::NearestMode::simple,
         ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
@@ -150,23 +159,37 @@ const std::vector<double> cubeCoefs = {
         -0.75f,
 };
 
+const std::vector<std::vector<int64_t>> defaultAxes = {
+    {2, 3}
+};
+
+const std::vector<std::vector<float>> defaultScales = {
+    {1.25f, 1.5f}
+};
+
 const auto interpolateCasesNN = ::testing::Combine(
         ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest),
+        ::testing::ValuesIn(shapeCalculationMode),
         ::testing::ValuesIn(coordinateTransformModes),
         ::testing::ValuesIn(nearestModes),
         ::testing::ValuesIn(antialias),
         ::testing::ValuesIn(pads),
         ::testing::ValuesIn(pads),
-        ::testing::ValuesIn(cubeCoefs));
+        ::testing::ValuesIn(cubeCoefs),
+        ::testing::ValuesIn(defaultAxes),
+        ::testing::ValuesIn(defaultScales));
 
 const auto interpolateCasesLinearOnnx = ::testing::Combine(
         ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx),
+        ::testing::ValuesIn(shapeCalculationMode),
         ::testing::ValuesIn(coordinateTransformModes),
         ::testing::ValuesIn(defNearestModes),
         ::testing::ValuesIn(antialias),
         ::testing::ValuesIn(pads),
         ::testing::ValuesIn(pads),
-        ::testing::ValuesIn(cubeCoefs));
+        ::testing::ValuesIn(cubeCoefs),
+        ::testing::ValuesIn(defaultAxes),
+        ::testing::ValuesIn(defaultScales));
 
 INSTANTIATE_TEST_CASE_P(InterpolateNN_Layout_Test, InterpolateLayerCPUTest,
         ::testing::Combine(
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/interpolate.cpp
new file mode 100644 (file)
index 0000000..499a879
--- /dev/null
@@ -0,0 +1,125 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/interpolate.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> prc = {
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32,
+};
+
+const std::vector<std::vector<size_t>> inShapes = {
+        {1, 1, 23, 23},
+};
+
+const std::vector<std::vector<size_t>> targetShapes = {
+        {1, 1, 46, 46},
+};
+
+const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> modesWithoutNearest = {
+        ngraph::op::v4::Interpolate::InterpolateMode::linear,
+        ngraph::op::v4::Interpolate::InterpolateMode::cubic,
+        ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> nearestMode = {
+        ngraph::op::v4::Interpolate::InterpolateMode::nearest,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::CoordinateTransformMode> coordinateTransformModes = {
+        ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn,
+        ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel,
+        ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel,
+        ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric,
+        ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::ShapeCalcMode> shapeCalculationMode = {
+        ngraph::op::v4::Interpolate::ShapeCalcMode::sizes,
+        ngraph::op::v4::Interpolate::ShapeCalcMode::scales,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::NearestMode> nearestModes = {
+        ngraph::op::v4::Interpolate::NearestMode::simple,
+        ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
+        ngraph::op::v4::Interpolate::NearestMode::floor,
+        ngraph::op::v4::Interpolate::NearestMode::ceil,
+        ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::NearestMode> defaultNearestMode = {
+        ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
+};
+
+const std::vector<std::vector<size_t>> pads = {
+        {0, 0, 1, 1},
+        {0, 0, 0, 0},
+};
+
+const std::vector<bool> antialias = {
+// Not enabled in Inference Engine
+//        true,
+        false,
+};
+
+const std::vector<double> cubeCoefs = {
+        -0.75f,
+};
+
+const std::vector<std::vector<int64_t>> defaultAxes = {
+    {0, 1, 2, 3}
+};
+
+const std::vector<std::vector<float>> defaultScales = {
+    {1.f, 1.f, 2.f, 2.f}
+};
+
+const auto interpolateCasesWithoutNearest = ::testing::Combine(
+        ::testing::ValuesIn(modesWithoutNearest),
+        ::testing::ValuesIn(shapeCalculationMode),
+        ::testing::ValuesIn(coordinateTransformModes),
+        ::testing::ValuesIn(defaultNearestMode),
+        ::testing::ValuesIn(antialias),
+        ::testing::ValuesIn(pads),
+        ::testing::ValuesIn(pads),
+        ::testing::ValuesIn(cubeCoefs),
+        ::testing::ValuesIn(defaultAxes),
+        ::testing::ValuesIn(defaultScales));
+
+const auto interpolateCasesNearesMode = ::testing::Combine(
+        ::testing::ValuesIn(nearestMode),
+        ::testing::ValuesIn(shapeCalculationMode),
+        ::testing::ValuesIn(coordinateTransformModes),
+        ::testing::ValuesIn(nearestModes),
+        ::testing::ValuesIn(antialias),
+        ::testing::ValuesIn(pads),
+        ::testing::ValuesIn(pads),
+        ::testing::ValuesIn(cubeCoefs),
+        ::testing::ValuesIn(defaultAxes),
+        ::testing::ValuesIn(defaultScales));
+
+INSTANTIATE_TEST_CASE_P(Interpolate_Basic, InterpolateLayerTest, ::testing::Combine(
+        interpolateCasesWithoutNearest,
+        ::testing::ValuesIn(prc),
+        ::testing::ValuesIn(inShapes),
+        ::testing::ValuesIn(targetShapes),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    InterpolateLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(Interpolate_Nearest, InterpolateLayerTest, ::testing::Combine(
+        interpolateCasesNearesMode,
+        ::testing::ValuesIn(prc),
+        ::testing::ValuesIn(inShapes),
+        ::testing::ValuesIn(targetShapes),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    InterpolateLayerTest::getTestCaseName);
+
+} // namespace
index 9cd14ea..5fb4657 100644 (file)
@@ -17,5 +17,7 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*(ComparisonLayerTest).*)",
             // TODO: Issue: 39014
             R"(.*CoreThreadingTestsWithIterations.*smoke_LoadNetwork.*)",
+            // TODO: Issue: 39612
+            R"(.*Interpolate.*cubic.*tf_half_pixel_for_nn.*FP16.*)",
     };
-}
\ No newline at end of file
+}
index 835d75c..17b6386 100644 (file)
@@ -18,12 +18,15 @@ namespace LayerTestsDefinitions {
 
 typedef std::tuple<
         ngraph::op::v4::Interpolate::InterpolateMode,          // InterpolateMode
+        ngraph::op::v4::Interpolate::ShapeCalcMode,            // ShapeCalculationMode
         ngraph::op::v4::Interpolate::CoordinateTransformMode,  // CoordinateTransformMode
         ngraph::op::v4::Interpolate::NearestMode,              // NearestMode
         bool,                                                  // AntiAlias
         std::vector<size_t>,                                   // PadBegin
         std::vector<size_t>,                                   // PadEnd
-        double                                                 // Cube coef
+        double,                                                // Cube coef
+        std::vector<int64_t>,                                  // Axes
+        std::vector<float>                                     // Scales
 > InterpolateSpecificParams;
 
 typedef std::tuple<
index 08aabc3..db594c8 100644 (file)
@@ -26,22 +26,28 @@ std::string InterpolateLayerTest::getTestCaseName(testing::TestParamInfo<Interpo
     std::string targetDevice;
     std::tie(interpolateParams, netPrecision, inputShapes, targetShapes, targetDevice) = obj.param;
     std::vector<size_t> padBegin, padEnd;
+    std::vector<int64_t> axes;
+    std::vector<float> scales;
     bool antialias;
     ngraph::op::v4::Interpolate::InterpolateMode mode;
+    ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode;
     ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode;
     ngraph::op::v4::Interpolate::NearestMode nearestMode;
     double cubeCoef;
-    std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams;
+    std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams;
     std::ostringstream result;
     result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
     result << "TS=" << CommonTestUtils::vec2str(targetShapes) << "_";
     result << "InterpolateMode=" << mode << "_";
+    result << "ShapeCalcMode=" << shapeCalcMode << "_";
     result << "CoordinateTransformMode=" << coordinateTransformMode << "_";
     result << "NearestMode=" << nearestMode << "_";
     result << "CubeCoef=" << cubeCoef << "_";
     result << "Antialias=" << antialias << "_";
     result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_";
     result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_";
+    result << "Axes=" << CommonTestUtils::vec2str(axes) << "_";
+    result << "Scales=" << CommonTestUtils::vec2str(scales) << "_";
     result << "netPRC=" << netPrecision.name() << "_";
     result << "targetDevice=" << targetDevice;
     return result.str();
@@ -54,33 +60,35 @@ void InterpolateLayerTest::SetUp() {
 
     std::tie(interpolateParams, netPrecision, inputShape, targetShape, targetDevice) = this->GetParam();
     std::vector<size_t> padBegin, padEnd;
+    std::vector<int64_t> axes;
+    std::vector<float> scales;
     bool antialias;
     ngraph::op::v4::Interpolate::InterpolateMode mode;
+    ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode;
     ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode;
     ngraph::op::v4::Interpolate::NearestMode nearestMode;
 
-    using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode;
-    ShapeCalcMode shape_calc_node = ShapeCalcMode::sizes;
     double cubeCoef;
-    std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams;
+    std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams;
 
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
     auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
 
-    auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape);
+    auto sizesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape);
+    auto sizesInput = std::make_shared<ngraph::opset3::Constant>(sizesConst);
 
-    std::vector<float> scales(targetShape.size(), 1.0f);
     auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales);
-
     auto scalesInput = std::make_shared<ngraph::opset3::Constant>(scales_const);
 
-    auto secondaryInput = std::make_shared<ngraph::opset3::Constant>(constant);
+    auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes);
+    auto axesInput = std::make_shared<ngraph::opset3::Constant>(axesConst);
 
-    ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shape_calc_node, padBegin,
+    ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin,
         padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef};
     auto interpolate = std::make_shared<ngraph::op::v4::Interpolate>(params[0],
-                                                                     secondaryInput,
+                                                                     sizesInput,
                                                                      scalesInput,
+                                                                     axesInput,
                                                                      interpolateAttributes);
     const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(interpolate)};
     function = std::make_shared<ngraph::Function>(results, params, "interpolate");
index 5dc3878..32f767c 100644 (file)
@@ -252,5 +252,7 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::Coordin
 
 std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::NearestMode type);
 
+std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type);
+
 }  // namespace helpers
 }  // namespace ngraph
index 0e61048..4ab358e 100644 (file)
@@ -708,5 +708,19 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::Nearest
     return os;
 }
 
+std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type) {
+    switch (type) {
+        case ngraph::op::v4::Interpolate::ShapeCalcMode::scales:
+            os << "scales";
+            break;
+        case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes:
+            os << "sizes";
+            break;
+        default:
+            throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
+    }
+    return os;
+}
+
 }  // namespace helpers
 }  // namespace ngraph
index 4527a35..add63e4 100644 (file)
@@ -18,6 +18,8 @@
 #pragma once
 #include "primitive.hpp"
 
+#include <map>
+
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
 /// @{
@@ -33,7 +35,47 @@ enum class resample_type : int32_t {
     /// @brief bilinear interpolation.
     bilinear,
     /// @brief caffe bilinear interpolation.
-    caffe_bilinear
+    caffe_bilinear,
+    /// @brief cubic interpolation.
+    cubic,
+    /// @brief linear onnx interpolation.
+    linear_onnx
+};
+
+/// @brief Specifies which of inputs target_spatial_shape or scales is used to calculate an output shape
+enum class shape_calculation_mode : int32_t {
+    /// @brief output shape calculated based on sizes of input and output tensors
+    sizes,
+    /// @brief output shape calculated based on scales coefficients
+    scales
+};
+
+/// @brief Coordinate transformation mode for the @ref resample layer.
+enum class coordinate_transformation_mode : int32_t {
+    /// @brief the coordinate in the original tensor axis `x` is calculated as `((x_resized + 0.5) / scale[x]) - 0.5`.
+    half_pixel,
+    /// @brief the coordinate in the original tensor axis `x` is calculated by `(x_resized + 0.5) / scale[x] - 0.5 if output_shape[x] > 1 else 0.0`.
+    pytorch_half_pixel,
+    /// @brief the coordinate in the original tensor axis `x` is calculated according to the formula `x_resized / scale[x]`.
+    asymmetric,
+    /// @brief the coordinate in the original tensor axis `x` is `(x_resized + 0.5) / scale[x]`.
+    tf_half_pixel_for_nn,
+    /// @brief the coordinate in the original tensor axis `x` is calculated as `0 if output_shape[x] == 1 else x_resized * (input_shape[x] - 1) / (output_shape[x] - 1)`.
+    align_corners
+};
+
+/// @brief Nearest mode for the @ref resample layer.
+enum class nearest_mode : int32_t {
+    /// @brief this mode is known as round half down.
+    round_prefer_floor,
+    /// @brief it is round half up mode.
+    round_prefer_ceil,
+    /// @brief this mode computes the largest integer value not greater than rounded value.
+    floor,
+    /// @brief this mode computes the smallest integer value not less than rounded value
+    ceil,
+    /// @brief this mode behaves as `ceil` mode when `Interpolate` is downsample, and as dropping the fractional part otherwise.
+    simple
 };
 
 /// @brief Performs nearest neighbor/bilinear resample
@@ -41,6 +83,17 @@ enum class resample_type : int32_t {
 struct resample : public primitive_base<resample> {
     CLDNN_DECLARE_PRIMITIVE(resample)
 
+    enum resample_axis {
+        along_b,
+        along_f,
+        along_x,
+        along_y,
+        along_z,
+        along_w
+    };
+
+    using AxesAndScales = std::map<resample_axis, float>;
+
     /// @brief Constructs Resample primitive.
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -60,18 +113,23 @@ struct resample : public primitive_base<resample> {
         : primitive_base(id, {input}, output_padding),
           output_size(output_size),
           num_filter(num_filter),
-          pad_begin(0),
-          pad_end(0),
           align_corners(1),
           operation_type(operation_type),
+          shape_calc_mode(shape_calculation_mode::sizes),
           with_activation(with_activation),
-          activation_negative_slope(activation_slp) {}
+          activation_negative_slope(activation_slp),
+          coord_trans_mode(coordinate_transformation_mode::asymmetric),
+          round_mode(nearest_mode::floor) {
+        if (operation_type == resample_type::caffe_bilinear) {
+            coord_trans_mode = coordinate_transformation_mode::half_pixel;
+        }
+    }
 
     /// @brief Constructs Resample primitive with Interp operation.
     /// @param id This primitive id.
     /// @param input Input primitive id.
-    /// @param pad_begin Optional begin padding for input.
-    /// @param pad_end Optional end padding for input.
+    /// @param pads_begin Optional begin padding for input.
+    /// @param pads_end Optional end padding for input.
     /// @param align_corners Align corner pixels of the input and output tensors.
     /// @param resample_type Resample bilinear method.
     /// @param with_activation Enables Relu activation.
@@ -79,8 +137,8 @@ struct resample : public primitive_base<resample> {
     resample(const primitive_id& id,
              const primitive_id& input,
              tensor output_size,
-             int32_t pad_begin = 0,
-             int32_t pad_end = 0,
+             std::vector<int32_t> pads_begin = {},
+             std::vector<int32_t> pads_end = {},
              int32_t align_corners = 1,
              resample_type operation_type = resample_type::bilinear,
              bool with_activation = false,
@@ -89,29 +147,75 @@ struct resample : public primitive_base<resample> {
         : primitive_base(id, {input}, output_padding),
           output_size(output_size),
           num_filter(0),
-          pad_begin(pad_begin),
-          pad_end(pad_end),
+          pads_begin(pads_begin),
+          pads_end(pads_end),
           align_corners(align_corners),
           operation_type(operation_type),
+          shape_calc_mode(shape_calculation_mode::sizes),
           with_activation(with_activation),
-          activation_negative_slope(activation_slp) {}
+          activation_negative_slope(activation_slp),
+          coord_trans_mode(coordinate_transformation_mode::asymmetric),
+          round_mode(nearest_mode::floor) {}
+
+    /// @brief Constructs Resample primitive with Interpolate operation.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param pads_begin Optional begin padding for input.
+    /// @param pads_end Optional end padding for input.
+    resample(const primitive_id& id,
+             const primitive_id& input,
+             tensor output_size,
+             AxesAndScales axesAndScales,
+             std::vector<int32_t> pads_begin = {},
+             std::vector<int32_t> pads_end = {},
+             int32_t antialias = 0,
+             float cube_coeff = -0.75f,
+             resample_type mode = resample_type::caffe_bilinear,
+             shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes,
+             coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel,
+             nearest_mode nm = nearest_mode::round_prefer_floor,
+             const padding& output_padding = padding())
+        : primitive_base(id, {input}, output_padding),
+          output_size(output_size),
+          axesAndScales(axesAndScales),
+          pads_begin(pads_begin),
+          pads_end(pads_end),
+          operation_type(mode),
+          shape_calc_mode(shape_calc_mode),
+          with_activation(false),
+          antialias(antialias),
+          cube_coeff(cube_coeff),
+          coord_trans_mode(ctm),
+          round_mode(nm) {}
 
     /// @param scale Resample scale.
     tensor output_size;
     /// @param num_filter Input filter. Only used by bilinear sample_type.
     uint32_t num_filter;
-    /// @param pad_begin Begin padding for input.
-    int32_t pad_begin;
-    /// @param pad_end End padding for input.
-    int32_t pad_end;
+    /// @param scales scales for spatial axes.
+    AxesAndScales axesAndScales;
+    /// @param pads_begin Begin paddings for input.
+    std::vector<int32_t> pads_begin;
+    /// @param pads_end End paddings for input.
+    std::vector<int32_t> pads_end;
     /// @param align_corners corner pixels of the input and output tensors
     int32_t align_corners;
     /// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
     resample_type operation_type;
+    /// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape.
+    shape_calculation_mode shape_calc_mode;
     /// @brief Enables Relu activation.
     bool with_activation;
     /// @brief Relu activation slope.
     float activation_negative_slope;
+    /// @param antialias is a flag that specifies whether to perform anti-aliasing.
+    int32_t antialias;
+    /// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic.
+    float cube_coeff;
+    /// @param specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
+    coordinate_transformation_mode coord_trans_mode;
+    /// @param specifies round mode when mode == nearest and is used only when mode == nearest.
+    nearest_mode round_mode;
 };
 /// @}
 /// @}
index 5308e41..85a2793 100644 (file)
@@ -378,6 +378,38 @@ enum class ResampleType {
     NEAREST_NEIGHBOR,
     BILINEAR_INTERP,
     CAFFE_BILINEAR_INTERP,
+    CUBIC,
+    LINEAR_ONNX,
+};
+
+enum class CoordinateTransformationMode {
+    HALF_PIXEL,
+    PYTORCH_HALF_PIXEL,
+    ASYMMETRIC,
+    TF_HALF_PIXEL_FOR_NN,
+    ALIGN_CORNERS,
+};
+
+enum class NearestMode {
+    ROUND_PREFER_FLOOR,
+    ROUND_PREFER_CEIL,
+    FLOOR,
+    CEIL,
+    SIMPLE,
+};
+
+enum class ShapeCalculationMode {
+    SIZES,
+    SCALES,
+};
+
+enum class InterpolateAxis {
+    X,
+    Y,
+    Z,
+    W,
+    FEATURE,
+    BATCH
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
index d44b0f3..6f933f4 100644 (file)
 #include <string>
 #include <algorithm>
 #include <vector>
+#include <unordered_map>
+#include <iostream>
+
+namespace {
+int getAxisIndex(kernel_selector::InterpolateAxis axis) {
+    switch(axis) {
+    case kernel_selector::InterpolateAxis::BATCH:
+        return 0;
+    case kernel_selector::InterpolateAxis::FEATURE:
+        return 1;
+    case kernel_selector::InterpolateAxis::Z:
+        return 2;
+    case kernel_selector::InterpolateAxis::Y:
+        return 3;
+    case kernel_selector::InterpolateAxis::X:
+        return 4;
+    default:
+        return 0;
+    }
+}
+}  // namespace
 
 namespace kernel_selector {
 
@@ -44,14 +65,16 @@ ResampleKernelBase::DispatchData ResampleKernelBase::SetDefault(const kernel_sel
 
     if (arg.resampleType == ResampleType::NEAREST_NEIGHBOR)
         global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
-    else if (arg.resampleType == ResampleType::BILINEAR_INTERP)
+    else if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX)
         global = {Align(out.X().v, 32), out.Y().v, out.Batch().v};
     else if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP)
         global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v};
+    else
+        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
 
     local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
 
-    if (arg.resampleType == ResampleType::BILINEAR_INTERP) {
+    if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) {
         local[0] = 32;
         local[1] = 1;
         local[2] = 1;
@@ -102,41 +125,77 @@ JitConstants ResampleKernelBase::GetJitConstants(const resample_params& params)
     const auto& input = params.inputs[0];
     const auto& output = params.output;
     const auto align_corners = params.align_corners;
-    const auto pad_begin = params.pad_begin;
-    const auto pad_end = params.pad_end;
-    const auto x_size_padded = pad_begin + input.X().v + pad_end;
-    const auto y_size_padded = pad_begin + input.Y().v + pad_end;
-    const auto z_size_padded = pad_begin + input.Z().v + pad_end;
-    const auto out_x_size_padded = pad_begin + output.X().v + pad_end;
-    const auto out_y_size_padded = pad_begin + output.Y().v + pad_end;
-    const auto out_z_size_padded = pad_begin + output.Z().v + pad_end;
-    float x_ratio = 0;
-    float y_ratio = 0;
-    float z_ratio = 0;
+    auto pads_begin = params.pads_begin;
+    auto pads_end = params.pads_end;
+    if (pads_begin.size() == 4)
+        pads_begin.insert(std::next(pads_begin.begin(), 2), 0);
+    if (pads_end.size() == 4)
+        pads_end.insert(std::next(pads_end.begin(), 2), 0);
+
+    const auto b_size_padded = pads_begin[0] + input.Batch().v + pads_end[0];
+    const auto f_size_padded = pads_begin[1] + input.Feature().v + pads_end[1];
+    const auto x_size_padded = pads_begin[4] + input.X().v + pads_end[4];
+    const auto y_size_padded = pads_begin[3] + input.Y().v + pads_end[3];
+    const auto z_size_padded = pads_begin[2] + input.Z().v + pads_end[2];
+    const auto out_b_size_padded = output.Batch().v;
+    const auto out_f_size_padded = output.Feature().v;
+    const auto out_x_size_padded = output.X().v;
+    const auto out_y_size_padded = output.Y().v;
+    const auto out_z_size_padded = output.Z().v;
+    std::vector<float> scales(5);
+    std::vector<int32_t> axesUsed(5, 0);
+    bool paddingUsed = false;
+    for (size_t i = 0; i < pads_begin.size(); ++i) {
+        paddingUsed |= (pads_begin[i] != 0 || pads_end[i] != 0);
+    }
 
     if (align_corners) {
-        x_ratio = (out_x_size_padded) > 1 ? static_cast<float>(x_size_padded - 1) / static_cast<float>(out_x_size_padded - 1) : 0.0f;
-        y_ratio = (out_y_size_padded) > 1 ? static_cast<float>(y_size_padded - 1) / static_cast<float>(out_y_size_padded - 1) : 0.0f;
-        z_ratio = (out_z_size_padded) > 1 ? static_cast<float>(z_size_padded - 1) / static_cast<float>(out_z_size_padded - 1) : 0.0f;
+        scales[0] = (out_b_size_padded) > 1
+                        ? static_cast<float>(b_size_padded - 1) / static_cast<float>(out_b_size_padded - 1)
+                        : 0.0f;
+        scales[1] = (out_f_size_padded) > 1
+                        ? static_cast<float>(f_size_padded - 1) / static_cast<float>(out_f_size_padded - 1)
+                        : 0.0f;
+        scales[4] = (out_x_size_padded) > 1
+                        ? static_cast<float>(x_size_padded - 1) / static_cast<float>(out_x_size_padded - 1)
+                        : 0.0f;
+        scales[3] = (out_y_size_padded) > 1
+                        ? static_cast<float>(y_size_padded - 1) / static_cast<float>(out_y_size_padded - 1)
+                        : 0.0f;
+        scales[2] = (out_z_size_padded) > 1
+                        ? static_cast<float>(z_size_padded - 1) / static_cast<float>(out_z_size_padded - 1)
+                        : 0.0f;
     } else {
-        x_ratio = static_cast<float>(x_size_padded) / static_cast<float>(out_x_size_padded);
-        y_ratio = static_cast<float>(y_size_padded) / static_cast<float>(out_y_size_padded);
-        z_ratio = static_cast<float>(z_size_padded) / static_cast<float>(out_z_size_padded);
+        scales[0] = static_cast<float>(b_size_padded) / static_cast<float>(out_b_size_padded);
+        scales[1] = static_cast<float>(f_size_padded) / static_cast<float>(out_f_size_padded);
+        scales[4] = static_cast<float>(x_size_padded) / static_cast<float>(out_x_size_padded);
+        scales[3] = static_cast<float>(y_size_padded) / static_cast<float>(out_y_size_padded);
+        scales[2] = static_cast<float>(z_size_padded) / static_cast<float>(out_z_size_padded);
+    }
+    for (const auto& it : params.axesAndScales) {
+        int idx = getAxisIndex(it.first);
+        axesUsed[idx] = 1;
+        if (params.shapeCalculationMode == kernel_selector::ShapeCalculationMode::SCALES)
+            scales[idx] = 1.f / it.second;
+    }
+    for (size_t i = 0; i < scales.size(); ++i) {
+        if (scales[i] != 1.f)
+            axesUsed[i] = 1;
     }
 
     jit.AddConstants({
         MakeJitConstant(toString(params.resampleType), ""),
-        MakeJitConstant("X_RATIO", x_ratio),
-        MakeJitConstant("Y_RATIO", y_ratio),
-        MakeJitConstant("Z_RATIO", z_ratio),
-        MakeJitConstant("X_RATIO_HALF", x_ratio / 2.0f),
-        MakeJitConstant("Y_RATIO_HALF", y_ratio / 2.0f),
-        MakeJitConstant("Z_RATIO_HALF", z_ratio / 2.0f),
-        MakeJitConstant("PAD_BEGIN", pad_begin),
-        MakeJitConstant("PAD_END", pad_end),
+        MakeJitConstant(toString(params.nearestMode), ""),
+        MakeJitConstant(toString(params.coordTransMode), ""),
+        MakeJitConstant("SCALES", scales),
+        MakeJitConstant("PADS_BEGIN", pads_begin),
+        MakeJitConstant("PADS_END", pads_end),
+        MakeJitConstant("PADDING_USED", (int)paddingUsed),
+        MakeJitConstant("AXES_USED", axesUsed),
         MakeJitConstant("ALIGN_CORNERS", align_corners),
         MakeJitConstant("KERNEL_W", 2),
-        MakeJitConstant("ANTIALIAS", 0)
+        MakeJitConstant("ANTIALIAS", params.antialias),
+        MakeJitConstant("CUBE_COEFF", params.cube_coeff),
     });
 
     size_t feature_block_size = GetFeatureBlockSize(params);
@@ -149,7 +208,7 @@ JitConstants ResampleKernelBase::GetJitConstants(const resample_params& params)
         }
     }
 
-    if (params.resampleType == ResampleType::BILINEAR_INTERP) {
+    if (params.resampleType == ResampleType::BILINEAR_INTERP || params.resampleType == ResampleType::LINEAR_ONNX) {
         if (params.output.X().v % 32 != 0) {
             jit.AddConstant(MakeJitConstant("LEFTOVERS", 1));
         }
index f2a3c31..bc4e34a 100644 (file)
@@ -16,6 +16,8 @@
 
 #include "common_kernel_base.h"
 
+#include <map>
+
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // resample_params
@@ -23,10 +25,17 @@ namespace kernel_selector {
 struct resample_params : public base_params {
     resample_params() : base_params(KernelType::RESAMPLE) {}
 
-    uint32_t pad_begin = 0;
-    uint32_t pad_end = 0;
+    std::vector<int32_t> pads_begin = {};
+    std::vector<int32_t> pads_end = {};
     uint32_t align_corners = 0;
     ResampleType resampleType = ResampleType::NEAREST_NEIGHBOR;
+    CoordinateTransformationMode coordTransMode = CoordinateTransformationMode::HALF_PIXEL;
+    NearestMode nearestMode = NearestMode::ROUND_PREFER_FLOOR;
+    ShapeCalculationMode shapeCalculationMode = ShapeCalculationMode::SIZES;
+    uint32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    using AxesAndScales = std::map<InterpolateAxis, float>;
+    AxesAndScales axesAndScales;
 
     virtual ParamsKey GetParamsKey() const {
         auto k = base_params::GetParamsKey();
index d7d7484..90069a7 100644 (file)
@@ -40,6 +40,7 @@ ParamsKey ResampleKernelRef::GetSupportedKey() const {
     k.EnableReampleType(ResampleType::NEAREST_NEIGHBOR);
     k.EnableReampleType(ResampleType::CAFFE_BILINEAR_INTERP);
     k.EnableReampleType(ResampleType::BILINEAR_INTERP);
+    k.EnableReampleType(ResampleType::CUBIC);
     return k;
 }
 
index a870286..2824c19 100644 (file)
@@ -52,13 +52,13 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
 
     unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
 #ifdef SAMPLE_TYPE_NEAREST
-        const int ix = floor((x + out_x) * X_RATIO);
-        const int iy = floor(y * Y_RATIO);
+        const int ix = floor((x + out_x) * SCALES[4]);
+        const int iy = floor(y * SCALES[3]);
 
         in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iy, ix));
 #else
-        const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(X_RATIO) * (x + out_x);
-        const ACCUMULATOR_TYPE iy = TO_ACCUMULATOR_TYPE(Y_RATIO) * y;
+        const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(SCALES[4]) * (x + out_x);
+        const ACCUMULATOR_TYPE iy = TO_ACCUMULATOR_TYPE(SCALES[3]) * y;
 
         const int top_y_index    = (int)(floor(iy));
         const int bottom_y_index = min((int)ceil(iy), INPUT0_SIZE_Y - 1);
index a7372ed..f20ce3d 100644 (file)
@@ -38,6 +38,48 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint z, uint y, uint x)
 #endif
 }
 
+inline int FUNC(get_nearest_val)(float num, bool is_downsample)
+{
+#if defined(NEAREST_ROUND_PREFER_FLOOR)
+    return (num == (int)num + 0.5f) ? (int)floor(num) : (int)round(num);
+#elif defined(NEAREST_ROUND_PREFER_CEIL)
+    return (int)round(num);
+#elif defined(NEAREST_FLOOR)
+    return (int)floor(num);
+#elif defined(NEAREST_CEIL)
+    return (int)ceil(num);
+#elif defined(NEAREST_SIMPLE)
+    return is_downsample ? (int)ceil(num) : (int)num;
+#else
+#error [clDNN resample_ref.cl]: nearest mode - not supported
+#endif
+}
+
+inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
+{
+#if defined(COORD_TRANS_MODE_HALF_PIXEL)
+    return (num + 0.5f) * scale - 0.5f;
+#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
+    return (length_resized > 1) ? (num + 0.5f) * scale - 0.5f : 0.f;
+#elif defined(COORD_TRANS_MODE_ASYMMETRIC)
+    return num * scale;
+#elif defined(COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN)
+    return (num + 0.5f) * scale;
+#elif defined(COORD_TRANS_MODE_ALIGN_CORNERS)
+    return (length_resized != 1) ? num * (length_original - 1) / (length_resized - 1) : 0.f;
+#else
+#error [clDNN resample_ref.cl]: coordinate transformation mode - not supported
+#endif
+}
+
+inline void FUNC(get_cubic_coeff)(float* cubic_coef, float coord, float coef)
+{
+    float abs_num = fabs(coord);
+    cubic_coef[0] = coef * (abs_num - 1.0) * (abs_num - 1.0) * abs_num;
+    cubic_coef[1] = ((coef + 2.0) * abs_num - (coef + 3.0)) * abs_num * abs_num + 1.0;
+    cubic_coef[2] = (((-coef - 2.0) * abs_num + (2.0 * coef + 3.0)) * abs_num - coef) * abs_num;
+    cubic_coef[3] = -coef * abs_num * abs_num * (abs_num - 1.0);
+}
 
 #define TRIANGLE_COEFF(x) (ACCUMULATOR_MAX_FUNC(ACCUMULATOR_VAL_ZERO, ACCUMULATOR_VAL_ONE - ACCUMULATOR_ABS_FUNC(x)))
 #define unroll_for __attribute__((opencl_unroll_hint)) for
@@ -49,73 +91,223 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
 #endif
 )
 {
+    const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
+    const int out_size[5] = { OUTPUT_BATCH_NUM, OUTPUT_FEATURE_NUM, OUTPUT_SIZE_Z, OUTPUT_SIZE_Y, OUTPUT_SIZE_X };
 #if defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
     typedef MAKE_VECTOR_TYPE(INPUT0_TYPE, PACK_SIZE) in_pack_t;
     typedef MAKE_VECTOR_TYPE(OUTPUT_TYPE, PACK_SIZE) out_pack_t;
 
-    const int ox = get_global_id(0);
+    int out_coords[5];
+    out_coords[4] = get_global_id(0);
 #if OUTPUT_DIMS <= 4
-    const int oy = get_global_id(1);
-    const int oz = 0;
-#else
-    const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y;
-    const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+    out_coords[3] = get_global_id(1);
+    out_coords[2] = 0;
+#else // OUTPUT_DIMS <= 4
+    out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y;
+    out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+#endif //  OUTPUT_DIMS <= 4
+    out_coords[1] = ((int)get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM;
+    out_coords[0] = ((int)get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM;
+    int in_coords[5];
+    bool isOutOfBounds = false;
+    unroll_for (int i = 0; i < 5; ++i) {
+        const float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] +  PADS_END[i]);
+        const int nearest_pixel = FUNC_CALL(get_nearest_val)(orig_coord, SCALES[i] > 1) - PADS_BEGIN[i];
+        in_coords[i] = max(-PADS_BEGIN[0], min(nearest_pixel, in_size[i] + PADS_END[i] - 1));
+#if PADDING_USED == 1
+        if (in_coords[i] < 0 || in_coords[i] >= in_size[i])
+            isOutOfBounds = true;
 #endif
-    const int feature = ((int)get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM;
-    const int batch = ((int)get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM;
-    const int ix = floor(ox * X_RATIO);
-    const int iy = floor(oy * Y_RATIO);
-    const int iz = floor(oz * Z_RATIO);
+    }
 
-    uint input_idx = FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix);
-    uint output_idx = FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox);
+    uint input_idx = FUNC_CALL(get_input_index)(in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4]);
+    uint output_idx = FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4]);
 
     in_pack_t interp_val_pack = ((const __global in_pack_t*)(input + input_idx))[0];
     out_pack_t res;
     unroll_for (uint pi = 0; pi < PACK_SIZE; ++pi) {
         INPUT0_TYPE interp_val = interp_val_pack[pi];
+#if PADDING_USED == 1
+        if (isOutOfBounds)
+            interp_val = INPUT0_VAL_ZERO;
+#endif
     #if HAS_FUSED_OPS
-        #define OF_ID (feature + pi)
+        #define OF_ID (out_coords[1] + pi)
         FUSED_OPS;
         res[pi] = FUSED_OPS_RESULT;
-    #else
+    #else // HAS_FUSED_OPS
         res[pi] = ACTIVATION(interp_val, ACTIVATION_PARAMS);
-    #endif
+    #endif // HAS_FUSED_OPS
     }
     ((__global out_pack_t*)(output + output_idx))[0] = res;
 
-#elif defined(SAMPLE_TYPE_NEAREST)
-    const int ox = get_global_id(0);
+#elif defined(SAMPLE_TYPE_NEAREST) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+    int out_coords[5];
+    out_coords[4] = get_global_id(0);
 #if OUTPUT_DIMS <= 4
-    const int oy = get_global_id(1);
-    const int oz = 0;
-#else
-    const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y;
-    const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+    out_coords[3] = get_global_id(1);
+    out_coords[2] = 0;
+#else // OUTPUT_DIMS <= 4
+    out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y;
+    out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+#endif // OUTPUT_DIMS <= 4
+    out_coords[1] = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
+    out_coords[0] = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
+    int in_coords[5];
+    bool isOutOfBounds = false;
+    unroll_for (int i = 0; i < 5; ++i) {
+        const float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]);
+        int nearest_pixel = FUNC_CALL(get_nearest_val)(orig_coord, SCALES[i] > 1) - PADS_BEGIN[i];
+        in_coords[i] = max(-PADS_BEGIN[i], min(nearest_pixel, in_size[i] + PADS_END[i] - 1));
+#if PADDING_USED == 1
+        if (in_coords[i] < 0 || in_coords[i] >= in_size[i])
+            isOutOfBounds = true;
+#endif
+    }
+    INPUT0_TYPE interp_val = input[FUNC_CALL(get_input_index)(in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4])];
+#if PADDING_USED == 1
+    if (isOutOfBounds)
+        interp_val = INPUT0_VAL_ZERO;
 #endif
-    const int feature = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
-    const int batch = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
-    const int ix = floor(ox * X_RATIO);
-    const int iy = floor(oy * Y_RATIO);
-    const int iz = floor(oz * Z_RATIO);
-
-    INPUT0_TYPE interp_val = input[FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix)];
 #if HAS_FUSED_OPS
-    #define OF_ID (feature)
+    #define OF_ID (out_coords[1])
     FUSED_OPS;
     OUTPUT_TYPE res = FUSED_OPS_RESULT;
-#else
+#else // HAS_FUSED_OPS
     OUTPUT_TYPE res = ACTIVATION(interp_val, ACTIVATION_PARAMS);
+#endif // HAS_FUSED_OPS
+    output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res;
+#elif defined(SAMPLE_TYPE_CUBIC) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+    int out_coords[5];
+    out_coords[4] = get_global_id(0);
+#if OUTPUT_DIMS <= 4
+    out_coords[3] = get_global_id(1);
+    out_coords[2] = 0;
+#else // OUTPUT_DIMS <= 4
+    out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y;
+    out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+#endif // OUTPUT_DIMS <= 4
+    out_coords[1] = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
+    out_coords[0] = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
+    int in_coords[5];
+    float cubic_coeff[5][4];
+    unroll_for (int i = 0; i < 5; ++i) {
+        float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]) - PADS_BEGIN[i];
+        in_coords[i] = floor(orig_coord);
+        orig_coord = (orig_coord - in_coords[i]) * AXES_USED[i];
+        FUNC_CALL(get_cubic_coeff)(cubic_coeff[i], orig_coord, CUBE_COEFF);
+    }
+
+    INPUT0_TYPE interp_val = INPUT0_VAL_ZERO;
+    int index[5];
+    unroll_for (index[0] = 0; index[0] <= 3; ++index[0]) {
+        unroll_for (index[1] = 0; index[1] <= 3; ++index[1]) {
+            unroll_for (index[2] = 0; index[2] <= 3; ++index[2]) {
+                unroll_for (index[3] = 0; index[3] <= 3; ++index[3]) {
+                    unroll_for (index[4] = 0; index[4] <= 3; ++index[4]) {
+                        int coords_sum[5] = { in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4] };
+                        float coeff_prod = 1.0f;
+                        bool isOutOfBounds = false;
+                        unroll_for (int i = 0; i < 5; ++i) {
+                            coords_sum[i] = max(-PADS_BEGIN[i], min(in_coords[i] + index[i] - 1, PADS_END[i] + in_size[i] - 1));
+#if PADDING_USED == 1
+                            if (coords_sum[i] < 0 || coords_sum[i] >= in_size[i])
+                                isOutOfBounds = true;
+#endif
+                            coeff_prod *= cubic_coeff[i][index[i]];
+                        }
+#if PADDING_USED == 1
+                        if (!isOutOfBounds)
+#endif
+                            interp_val += coeff_prod * input[FUNC_CALL(get_input_index)(coords_sum[0], coords_sum[1], coords_sum[2], coords_sum[3], coords_sum[4])];
+                    }
+                }
+            }
+        }
+    }
+
+#if HAS_FUSED_OPS
+    #define OF_ID (out_coords[1])
+    FUSED_OPS;
+    OUTPUT_TYPE res = FUSED_OPS_RESULT;
+#else // HAS_FUSED_OPS
+    OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
+#endif // HAS_FUSED_OPS
+    output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res;
+#elif defined(SAMPLE_TYPE_LINEAR_ONNX) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+    const int ox = get_global_id(0);
+    const int oy = get_global_id(1);
+    const int feature = 0;
+    const int batch = get_global_id(2);
+    const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3];
+    const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4];
+    const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X);
+    const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y);
+
+#ifdef LEFTOVERS
+    if (ox >= OUTPUT_SIZE_X)
+        return;
 #endif
-    output[FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox)] = res;
 
-#elif defined(SAMPLE_TYPE_INTERP)
+    float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
+    float in_x = fmax(0, fmin(ix, PADDED_X - 1));
+    int in_y1 = min((int)in_y, PADDED_Y - 1);
+    int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
+    int in_x1 = min((int)in_x, PADDED_X - 1);
+    int in_x2 = min(in_x1 + 1, PADDED_X - 1);
+
+    const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
+    const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
+    const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
+    const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
+#if PADDING_USED == 1
+    in_y1 -= PADS_BEGIN[3];
+    in_y2 -= PADS_BEGIN[3];
+    in_x1 -= PADS_BEGIN[4];
+    in_x2 -= PADS_BEGIN[4];
+
+    bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
+    bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
+    bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
+    bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
+#endif
+    unroll_for(int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) {
+        INPUT0_TYPE top_left = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x1)];
+        INPUT0_TYPE top_right = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x2)];
+        INPUT0_TYPE bottom_left = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x1)];
+        INPUT0_TYPE bottom_right = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x2)];
+#if PADDING_USED == 1
+        if (tlOutOfBounds)
+            top_left = INPUT0_VAL_ZERO;
+        if (trOutOfBounds)
+            top_right = INPUT0_VAL_ZERO;
+        if (blOutOfBounds)
+            bottom_left = INPUT0_VAL_ZERO;
+        if (brOutOfBounds)
+            bottom_right = INPUT0_VAL_ZERO;
+#endif
+
+        ACCUMULATOR_TYPE interp_val = TO_ACCUMULATOR_TYPE(dx2 * dy2 * top_left) +
+                                      TO_ACCUMULATOR_TYPE(dx1 * dy2 * top_right) +
+                                      TO_ACCUMULATOR_TYPE(dx2 * dy1 * bottom_left) +
+                                      TO_ACCUMULATOR_TYPE(dx1 * dy1 * bottom_right);
+
+#if HAS_FUSED_OPS
+        #define OF_ID (in_f)
+        FUSED_OPS;
+        OUTPUT_TYPE res = FUSED_OPS_RESULT;
+#else
+        OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
+#endif
+        output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res;
+    }
+#elif defined(SAMPLE_TYPE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
     const int ox = get_global_id(0);
     const int oy = get_global_id(1);
     const int feature = 0;
     const int batch = get_global_id(2);
-    const float ix = X_RATIO * ox;
-    const float iy = Y_RATIO * oy;
+    const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], OUTPUT_SIZE_X, in_size[4]);
+    const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], OUTPUT_SIZE_Y, in_size[3]);
 
 #ifdef LEFTOVERS
     if (ox >= OUTPUT_SIZE_X)
@@ -123,9 +315,9 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
 #endif
 
     const int top_y_index    = (int)(floor(iy));
-    const int bottom_y_index = min((int)ceil(iy), INPUT0_SIZE_Y - 1);
+    const int bottom_y_index = min((int)ceil(iy), in_size[3] - 1);
     const int left_x_index   = (int)(floor(ix));
-    const int right_x_index  = min((int)ceil(ix), INPUT0_SIZE_X - 1);
+    const int right_x_index  = min((int)ceil(ix), in_size[4] - 1);
 
     const ACCUMULATOR_TYPE dx = TO_ACCUMULATOR_TYPE(ix - left_x_index);
     const ACCUMULATOR_TYPE dy = TO_ACCUMULATOR_TYPE(iy - top_y_index);
@@ -146,11 +338,11 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
         FUSED_OPS;
         OUTPUT_TYPE res = FUSED_OPS_RESULT;
 #else
-        OUTPUT_TYPE res = TO_OUTPUT_TYPE(ACTIVATION(interp_val, ACTIVATION_PARAMS));
+        OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
 #endif
         output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res;
     }
-#elif defined(SAMPLE_TYPE_CAFFE_INTERP)
+#elif defined(SAMPLE_TYPE_CAFFE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
     const int ox = (int)get_global_id(0) % OUTPUT_SIZE_X;
     const int oy = (int)get_global_id(0) / OUTPUT_SIZE_X;
     const int feature_block_nun = get_global_id(1);
@@ -162,84 +354,123 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
     const int batch = (int)get_global_id(2) % OUTPUT_BATCH_NUM;
     const int oz    = (int)get_global_id(2) / OUTPUT_BATCH_NUM;
 #endif
+    const int PADDED_B = in_size[0] + PADS_BEGIN[0] + PADS_END[0];
+    const int PADDED_F = in_size[1] + PADS_BEGIN[1] + PADS_END[1];
+    const int PADDED_Z = in_size[2] + PADS_BEGIN[2] + PADS_END[2];
+    const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3];
+    const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4];
 
-    const ACCUMULATOR_TYPE ix = ox * X_RATIO + X_RATIO_HALF - 0.5f;
-    const ACCUMULATOR_TYPE iy = oy * Y_RATIO + Y_RATIO_HALF - 0.5f;
-    const ACCUMULATOR_TYPE iz = oz * Z_RATIO + Z_RATIO_HALF - 0.5f;
+    ACCUMULATOR_TYPE i_b = AXES_USED[0] ? FUNC_CALL(get_original_coordinate)(batch, SCALES[0], out_size[0], PADDED_B) : batch;
+    ACCUMULATOR_TYPE i_f = AXES_USED[1] ? FUNC_CALL(get_original_coordinate)(feature, SCALES[1], out_size[1], PADDED_F) : feature;
+    ACCUMULATOR_TYPE i_x = AXES_USED[4] ? FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X) : ox;
+    ACCUMULATOR_TYPE i_y = AXES_USED[3] ? FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y) : oy;
+    ACCUMULATOR_TYPE i_z = AXES_USED[2] ? FUNC_CALL(get_original_coordinate)(oz, SCALES[2], out_size[2], PADDED_Z) : oz;
+#if PADDING_USED == 1
+    i_b -= PADS_BEGIN[0];
+    i_f -= PADS_BEGIN[1];
+    i_z -= PADS_BEGIN[2];
+    i_y -= PADS_BEGIN[3];
+    i_x -= PADS_BEGIN[4];
+#endif
 
-    const int ix_r = (int)ix;
-    const int iy_r = (int)iy;
-    const int iz_r = (int)iz;
+    const int ib_r = (int)i_b;
+    const int if_r = (int)i_f;
+    const int ix_r = (int)i_x;
+    const int iy_r = (int)i_y;
+    const int iz_r = (int)i_z;
 
 #if ANTIALIAS == 1
-    const ACCUMULATOR_TYPE ax = 1.0f / X_RATIO;
-    const ACCUMULATOR_TYPE ay = 1.0f / Y_RATIO;
-    const ACCUMULATOR_TYPE az = 1.0f / Z_RATIO;
+    const ACCUMULATOR_TYPE ab = 1.0f / SCALES[0];
+    const ACCUMULATOR_TYPE af = 1.0f / SCALES[1];
+    const ACCUMULATOR_TYPE ax = 1.0f / SCALES[4];
+    const ACCUMULATOR_TYPE ay = 1.0f / SCALES[3];
+    const ACCUMULATOR_TYPE az = 1.0f / SCALES[2];
 #else
+    const ACCUMULATOR_TYPE ab = 1.0f;
+    const ACCUMULATOR_TYPE af = 1.0f;
     const ACCUMULATOR_TYPE ax = 1.0f;
     const ACCUMULATOR_TYPE ay = 1.0f;
     const ACCUMULATOR_TYPE az = 1.0f;
 #endif
-    const int rx = (X_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ax);
-    const int ry = (Y_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ay);
-    const int rz = (Z_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / az);
-
-    ACCUMULATOR_TYPE sum[FEATURE_BLOCK_SIZE];
-    for (int i = 0; i < FEATURE_BLOCK_SIZE; i++)
-        sum[i] = 0;
-
-    ACCUMULATOR_TYPE wsum = 0;
-
-    int const y_init = max(0, iy_r - ry);
-    int const x_init = max(0, ix_r - rx);
-    int const z_init = max(0, iz_r - rz);
-    int const y_max = min(INPUT0_SIZE_Y, iy_r + ry + 1);
-    int const x_max = min(INPUT0_SIZE_X, ix_r + rx + 1);
-    int const z_max = min(INPUT0_SIZE_Z, iz_r + rz + 1);
-
-    unroll_for(int z = z_init; z < z_max; z++) {
-        unroll_for(int y = y_init; y < y_max; y++) {
-            unroll_for(int x = x_init; x < x_max; x++) {
-                ACCUMULATOR_TYPE dx = ix - x;
-                ACCUMULATOR_TYPE dy = iy - y;
-                ACCUMULATOR_TYPE dz = iz - z;
-#if ANTIALIAS == 1
-                ACCUMULATOR_TYPE w = ax * TRIANGLE_COEFF(ax * dx) * ay * TRIANGLE_COEFF(ay * dy) * az * triangleCoeff(az * dz);
+    const int rb = (SCALES[0] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ab);
+    const int rf = (SCALES[1] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / af);
+    const int rx = (SCALES[4] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ax);
+    const int ry = (SCALES[3] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ay);
+    const int rz = (SCALES[2] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / az);
+
+    int const b_init = max(-PADS_BEGIN[0], ib_r - rb);
+    int const f_init = max(-PADS_BEGIN[1], if_r - rf);
+    int const y_init = max(-PADS_BEGIN[3], iy_r - ry);
+    int const x_init = max(-PADS_BEGIN[4], ix_r - rx);
+    int const z_init = max(-PADS_BEGIN[2], iz_r - rz);
+    int const b_max = min(PADS_END[0] + INPUT0_BATCH_NUM, ib_r + rb + 1);
+    int const f_max = min(PADS_END[1] + INPUT0_FEATURE_NUM, if_r + rf + 1);
+    int const y_max = min(PADS_END[3] + INPUT0_SIZE_Y, iy_r + ry + 1);
+    int const x_max = min(PADS_END[4] + INPUT0_SIZE_X, ix_r + rx + 1);
+    int const z_max = min(PADS_END[2] + INPUT0_SIZE_Z, iz_r + rz + 1);
+#ifndef LEFTOVERS
+    const int fp_max = FEATURE_BLOCK_SIZE;
 #else
-                ACCUMULATOR_TYPE w = TRIANGLE_COEFF(dx) * TRIANGLE_COEFF(dy) * TRIANGLE_COEFF(dz);
+    const int fp_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER);
 #endif
+    ACCUMULATOR_TYPE sum[fp_max] = {0};
+    ACCUMULATOR_TYPE wsum[fp_max] = {0};
 
-#ifndef LEFTOVERS
-                unroll_for(int f = 0; f < FEATURE_BLOCK_SIZE; f++) {
+    unroll_for(int b = b_init; b < b_max; b++) {
+        unroll_for(int f = f_init; f < f_max; f++) {
+            unroll_for(int z = z_init; z < z_max; z++) {
+                unroll_for(int y = y_init; y < y_max; y++) {
+                    unroll_for(int x = x_init; x < x_max; x++) {
+                        unroll_for(int fp = 0; fp < fp_max; fp++) {
+#if PADDING_USED == 1
+                            bool isOutOfBounds = b < 0 || f < 0 || z < 0 || y < 0 || x < 0 ||
+                                                 b >= in_size[0] || f >= in_size[1] || z >= in_size[2] ||
+                                                 y >= in_size[3] || x >= in_size[4];
+#endif
+
+                            ACCUMULATOR_TYPE db = i_b - b;
+                            ACCUMULATOR_TYPE df = i_f - f;
+                            ACCUMULATOR_TYPE dx = i_x - x;
+                            ACCUMULATOR_TYPE dy = i_y - y;
+                            ACCUMULATOR_TYPE dz = i_z - z;
+#if ANTIALIAS == 1
+                            ACCUMULATOR_TYPE w = ab * TRIANGLE_COEFF(ab * db) *
+                                                 af * TRIANGLE_COEFF(af * df) *
+                                                 ax * TRIANGLE_COEFF(ax * dx) *
+                                                 ay * TRIANGLE_COEFF(ay * dy) *
+                                                 az * TRIANGLE_COEFF(az * dz);
 #else
-                const int f_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER);
-                unroll_for(int f = 0; f < f_max; f++) {
+                            ACCUMULATOR_TYPE w = TRIANGLE_COEFF(db) *
+                                                 TRIANGLE_COEFF(df) *
+                                                 TRIANGLE_COEFF(dx) *
+                                                 TRIANGLE_COEFF(dy) *
+                                                 TRIANGLE_COEFF(dz);
 #endif
-                if (w != 0)
-                    sum[f] += w * TO_ACCUMULATOR_TYPE(input[FUNC_CALL(get_input_index)(batch, feature + f, z, y, x)]);
+                            if (w != 0 && f + fp < INPUT0_FEATURE_NUM) {
+                                wsum[fp] += w;
+#if PADDING_USED == 1
+                                if (!isOutOfBounds)
+#endif
+                                    sum[fp] += w * TO_ACCUMULATOR_TYPE(input[FUNC_CALL(get_input_index)(b, f + fp, z, y, x)]);
+                            }
+                        }
+                    }
                 }
-                wsum += w;
             }
         }
     }
-#ifndef LEFTOVERS
-    unroll_for (int f = 0; f < FEATURE_BLOCK_SIZE; f++) {
-#else
-    const int f_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER);
-    unroll_for (int f = 0; f < f_max; f++) {
-#endif
-
-        ACCUMULATOR_TYPE interp_val = (wsum == 0) ? 0 : (sum[f] / wsum);
+    unroll_for (int f = 0; f < fp_max; f++) {
+        ACCUMULATOR_TYPE interp_val = (wsum[f] == 0) ? ACCUMULATOR_VAL_ZERO : (sum[f] / wsum[f]);
 #if HAS_FUSED_OPS
         #define OF_ID (feature + f)
         FUSED_OPS;
         OUTPUT_TYPE res = FUSED_OPS_RESULT;
 #else
-        OUTPUT_TYPE res = TO_OUTPUT_TYPE(ACTIVATION(interp_val, ACTIVATION_PARAMS));
+        OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
 #endif
         output[FUNC_CALL(get_output_index)(batch, feature + f, oz, oy, ox)] = res;
     }
-#endif
+#endif // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
 }
 
 #undef unroll_for
index e9f97ae..95bac97 100644 (file)
@@ -429,6 +429,30 @@ std::string toString(ResampleType type) {
         case ResampleType::NEAREST_NEIGHBOR:  return "SAMPLE_TYPE_NEAREST";
         case ResampleType::BILINEAR_INTERP: return "SAMPLE_TYPE_INTERP";
         case ResampleType::CAFFE_BILINEAR_INTERP: return "SAMPLE_TYPE_CAFFE_INTERP";
+        case ResampleType::CUBIC: return "SAMPLE_TYPE_CUBIC";
+        case ResampleType::LINEAR_ONNX: return "SAMPLE_TYPE_LINEAR_ONNX";
+        default: return "";
+    }
+}
+
+std::string toString(CoordinateTransformationMode mode) {
+    switch (mode) {
+        case CoordinateTransformationMode::HALF_PIXEL:  return "COORD_TRANS_MODE_HALF_PIXEL";
+        case CoordinateTransformationMode::PYTORCH_HALF_PIXEL: return "COORD_TRANS_MODE_PYTORCH_HALF_PIXEL";
+        case CoordinateTransformationMode::ASYMMETRIC: return "COORD_TRANS_MODE_ASYMMETRIC";
+        case CoordinateTransformationMode::TF_HALF_PIXEL_FOR_NN: return "COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN";
+        case CoordinateTransformationMode::ALIGN_CORNERS: return "COORD_TRANS_MODE_ALIGN_CORNERS";
+        default: return "";
+    }
+}
+
+std::string toString(NearestMode mode) {
+    switch (mode) {
+        case NearestMode::ROUND_PREFER_FLOOR:  return "NEAREST_ROUND_PREFER_FLOOR";
+        case NearestMode::ROUND_PREFER_CEIL: return "NEAREST_ROUND_PREFER_CEIL";
+        case NearestMode::FLOOR: return "NEAREST_FLOOR";
+        case NearestMode::CEIL: return "NEAREST_CEIL";
+        case NearestMode::SIMPLE: return "NEAREST_SIMPLE";
         default: return "";
     }
 }
index 8c97e0e..ca781c3 100644 (file)
@@ -251,6 +251,8 @@ std::string toString(TileAxis a);
 std::string toString(GatherAxis a);
 std::string toString(ScatterUpdateAxis a);
 std::string toString(ResampleType type);
+std::string toString(CoordinateTransformationMode mode);
+std::string toString(NearestMode mode);
 std::string toString(const BorderType type);
 std::string toString(const Tensor::Dim& dim);
 std::string toString(const DataTensor& tensor);
index 7409bd4..be26f9c 100644 (file)
@@ -34,10 +34,78 @@ inline kernel_selector::sample_type convert_to_sample_type(resample_type type) {
             return kernel_selector::sample_type::CAFFE_BILINEAR_INTERP;
         case resample_type::bilinear:
             return kernel_selector::sample_type::BILINEAR_INTERP;
+        case resample_type::cubic:
+            return kernel_selector::sample_type::CUBIC;
+        case resample_type::linear_onnx:
+            return kernel_selector::sample_type::LINEAR_ONNX;
         default:
             return kernel_selector::sample_type::NEAREST_NEIGHBOR;
     }
 }
+
+inline kernel_selector::coordinate_transformation_mode convert_to_coord_transform_mode(coordinate_transformation_mode mode) {
+    switch (mode) {
+        case coordinate_transformation_mode::half_pixel:
+            return kernel_selector::coordinate_transformation_mode::HALF_PIXEL;
+        case coordinate_transformation_mode::pytorch_half_pixel:
+            return kernel_selector::coordinate_transformation_mode::PYTORCH_HALF_PIXEL;
+        case coordinate_transformation_mode::asymmetric:
+            return kernel_selector::coordinate_transformation_mode::ASYMMETRIC;
+        case coordinate_transformation_mode::tf_half_pixel_for_nn:
+            return kernel_selector::coordinate_transformation_mode::TF_HALF_PIXEL_FOR_NN;
+        case coordinate_transformation_mode::align_corners:
+            return kernel_selector::coordinate_transformation_mode::ALIGN_CORNERS;
+        default:
+            return kernel_selector::coordinate_transformation_mode::HALF_PIXEL;
+    }
+}
+
+inline kernel_selector::nearest_mode convert_to_nearest_mode(nearest_mode mode) {
+    switch (mode) {
+        case nearest_mode::round_prefer_floor:
+            return kernel_selector::nearest_mode::ROUND_PREFER_FLOOR;
+        case nearest_mode::round_prefer_ceil:
+            return kernel_selector::nearest_mode::ROUND_PREFER_CEIL;
+        case nearest_mode::floor:
+            return kernel_selector::nearest_mode::FLOOR;
+        case nearest_mode::ceil:
+            return kernel_selector::nearest_mode::CEIL;
+        case nearest_mode::simple:
+            return kernel_selector::nearest_mode::SIMPLE;
+        default:
+            return kernel_selector::nearest_mode::ROUND_PREFER_FLOOR;
+    }
+}
+
+inline kernel_selector::shape_calculation_mode convert_to_shape_calculation_mode(shape_calculation_mode mode) {
+    switch (mode) {
+        case shape_calculation_mode::sizes:
+            return kernel_selector::shape_calculation_mode::SIZES;
+        case shape_calculation_mode::scales:
+            return kernel_selector::shape_calculation_mode::SCALES;
+        default:
+            return kernel_selector::shape_calculation_mode::SIZES;
+    }
+}
+
+inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis axis) {
+    switch (axis) {
+        case resample::along_x:
+            return kernel_selector::interpolate_axis::X;
+        case resample::along_y:
+            return kernel_selector::interpolate_axis::Y;
+        case resample::along_z:
+            return kernel_selector::interpolate_axis::Z;
+        case resample::along_w:
+            return kernel_selector::interpolate_axis::W;
+        case resample::along_f:
+            return kernel_selector::interpolate_axis::FEATURE;
+        case resample::along_b:
+            return kernel_selector::interpolate_axis::BATCH;
+        default:
+            return kernel_selector::interpolate_axis::BATCH;
+    }
+}
 }  // namespace
 
 struct resample_gpu : typed_primitive_gpu_impl<resample> {
@@ -53,11 +121,20 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
         if (primitive->with_activation)
             convert_activation_func_params(primitive, us_params.activations);
 
+        size_t dimsNum = arg.get_output_layout().format.dimension();
         us_params.resampleType = convert_to_sample_type(primitive->operation_type);
+        us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode);
+        us_params.coordTransMode = convert_to_coord_transform_mode(primitive->coord_trans_mode);
+        us_params.shapeCalculationMode = convert_to_shape_calculation_mode(primitive->shape_calc_mode);
+        us_params.antialias = primitive->antialias;
+        us_params.cube_coeff = primitive->cube_coeff;
+        us_params.pads_begin = primitive->pads_begin.empty() ? std::vector<int32_t>(dimsNum, 0) : primitive->pads_begin;
+        us_params.pads_end = primitive->pads_end.empty() ? std::vector<int32_t>(dimsNum, 0) : primitive->pads_end;
+        for (const auto& it : primitive->axesAndScales) {
+            us_params.axesAndScales[convert_axis(it.first)] = it.second;
+        }
 
         if (primitive->operation_type == resample_type::bilinear) {
-            us_params.pad_begin = primitive->pad_begin;
-            us_params.pad_end = primitive->pad_end;
             us_params.align_corners = primitive->align_corners;
         }
 
index d4fa358..fc1a657 100644 (file)
@@ -76,6 +76,10 @@ using concat_axis = kernel_selector::ConcatAxis;
 using tile_axis = kernel_selector::TileAxis;
 using tuning_mode = kernel_selector::TuningMode;
 using sample_type = kernel_selector::ResampleType;
+using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode;
+using nearest_mode = kernel_selector::NearestMode;
+using shape_calculation_mode = kernel_selector::ShapeCalculationMode;
+using interpolate_axis = kernel_selector::InterpolateAxis;
 using border_type = kernel_selector::BorderType;
 using gather_axis = kernel_selector::GatherAxis;
 using scatter_update_axis = kernel_selector::ScatterUpdateAxis;
index ac33f7c..f626120 100644 (file)
@@ -62,17 +62,67 @@ std::string resample_inst::to_string(resample_node const& node) {
         resample_info.add("resample_type:", "bilinear_interp");
     else if (desc->operation_type == resample_type::caffe_bilinear)
         resample_info.add("resample_type:", "caffe_bilinear_interp");
+    else if (desc->operation_type == resample_type::cubic)
+        resample_info.add("resample_type:", "cubic");
     else
         resample_info.add("resample_type:", "not supported sample type");
 
+    if (desc->shape_calc_mode == shape_calculation_mode::sizes)
+        resample_info.add("shape_calculation_mode:", "sizes");
+    else
+        resample_info.add("shape_calculation_mode:", "scales");
+
+    if (desc->shape_calc_mode == shape_calculation_mode::scales) {
+        std::string axesAndScalesDump;
+        std::string delim = "";
+        for (auto& it : desc->axesAndScales) {
+            axesAndScalesDump += delim;
+            delim = ", ";
+            if (it.first == resample::resample_axis::along_b)
+                axesAndScalesDump += "b: ";
+            else if (it.first == resample::resample_axis::along_f)
+                axesAndScalesDump += "f: ";
+            else if (it.first == resample::resample_axis::along_x)
+                axesAndScalesDump += "x: ";
+            else if (it.first == resample::resample_axis::along_y)
+                axesAndScalesDump += "y: ";
+            else if (it.first == resample::resample_axis::along_z)
+                axesAndScalesDump += "z: ";
+            else
+                axesAndScalesDump += "w: ";
+            axesAndScalesDump += std::to_string(it.second);
+        }
+        resample_info.add("scales:", axesAndScalesDump);
+    }
+
+    if (desc->coord_trans_mode == coordinate_transformation_mode::half_pixel)
+        resample_info.add("coordinate_transformation_mode:", "half_pixel");
+    else if (desc->coord_trans_mode == coordinate_transformation_mode::pytorch_half_pixel)
+        resample_info.add("coordinate_transformation_mode:", "pytorch_half_pixel");
+    else if (desc->coord_trans_mode == coordinate_transformation_mode::tf_half_pixel_for_nn)
+        resample_info.add("coordinate_transformation_mode:", "tf_half_pixel_for_nn");
+    else if (desc->coord_trans_mode == coordinate_transformation_mode::align_corners)
+        resample_info.add("coordinate_transformation_mode:", "align_corners");
+    else
+        resample_info.add("coordinate_transformation_mode:", "asymmetric");
+
+    if (desc->round_mode == nearest_mode::round_prefer_floor)
+        resample_info.add("nearest_mode:", "round_prefer_floor");
+    if (desc->round_mode == nearest_mode::round_prefer_ceil)
+        resample_info.add("nearest_mode:", "round_prefer_ceil");
+    if (desc->round_mode == nearest_mode::floor)
+        resample_info.add("nearest_mode:", "floor");
+    if (desc->round_mode == nearest_mode::ceil)
+        resample_info.add("nearest_mode:", "ceil");
+    else
+        resample_info.add("nearest_mode:", "simple");
+
     resample_info.add("output_size", desc->output_size);
     resample_info.add("with activation", desc->with_activation);
     resample_info.add("output padding lower size", desc->output_padding.lower_size());
     resample_info.add("output padding upper size", desc->output_padding.upper_size());
 
     if (desc->operation_type == resample_type::bilinear) {
-        resample_info.add("pad_begin", desc->pad_begin);
-        resample_info.add("pad_end", desc->pad_end);
         resample_info.add("align_corners", desc->align_corners);
     }
 
index f6c6dd4..cad7778 100644 (file)
@@ -91,10 +91,10 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
         12.f,12.f, 9.f,   9.f,  -17.f,  -17.f,
     };
 
-    for (int i = 0; i < 2; ++i) { //B
-        for (int j = 0; j < 2; ++j) { //F
-            for (int k = 0; k < 4; ++k) { //Y
-                for (int l = 0; l < 6; ++l) { //X
+    for (int i = 0; i < 2; ++i) { // B
+        for (int j = 0; j < 2; ++j) { // F
+            for (int k = 0; k < 4; ++k) { // Y
+                for (int l = 0; l < 6; ++l) { // X
                     auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
                     EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id]));
                 }
@@ -146,8 +146,8 @@ TEST(resample_gpu, basic_in2x3x2x2_bilinear) {
         3.f, 3.25f, 3.75f, 4.f,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 4; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 4; ++l) { // X
             auto linear_id = l + k * 4;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
         }
@@ -172,7 +172,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(resample("upsampling", "input", output_size, 0, 0, 0, resample_type::bilinear));
+    topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear));
 
     set_values(input, {
         1.f, 2.f,
@@ -196,8 +196,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp) {
         3.0f, 3.5f, 4.0f, 4.0f,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 4; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 4; ++l) { // X
             auto linear_id = l + k * 4;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
         }
@@ -223,7 +223,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_f16) {
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
     topology.add(reorder("input_to_b_fs_yx_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32));
-    topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, 0, 0, 0, resample_type::bilinear));
+    topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear));
     topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32));
 
     set_values(input, {
@@ -254,8 +254,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_f16) {
         3.0f, 3.5f, 4.0f, 4.0f,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 4; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 4; ++l) { // X
             auto linear_id = l + k * 4;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
         }
@@ -281,7 +281,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_fsv32) {
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
     topology.add(reorder("input_to_fs_b_yx_fsv32", "input", format::fs_b_yx_fsv32, data_types::f16));
-    topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, 0, 0, 0, resample_type::bilinear));
+    topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear));
     topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32));
 
     set_values(input, {
@@ -312,8 +312,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_fsv32) {
         3.0f, 3.5f, 4.0f, 4.0f,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 4; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 4; ++l) { // X
             auto linear_id = l + k * 4;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
         }
@@ -339,7 +339,7 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_align_1) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(resample("upsampling", "input", output_size, 0, 0, 1, resample_type::bilinear));
+    topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 1, resample_type::bilinear));
 
     set_values(input, {
             1.f, 2.f,
@@ -363,8 +363,8 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_align_1) {
             3.000000f, 3.333333f, 3.666667f, 4.000000f
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 4; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 4; ++l) { // X
             auto linear_id = l + k * 4;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
         }
@@ -414,8 +414,8 @@ TEST(resample_gpu, nearest_asymmetric) {
         3.f, 3.f, 3.f, 4.f, 4.f,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 5; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 5; ++l) { // X
             auto linear_id = l + k * 5;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
         }
@@ -465,8 +465,8 @@ TEST(resample_gpu, nearest_asymmetric_i8) {
             3, 3, 3, 4, 4,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 5; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 5; ++l) { // X
             auto linear_id = l + k * 5;
             EXPECT_EQ(answers[linear_id], output_ptr[linear_id]);
         }
@@ -516,8 +516,8 @@ TEST(resample_gpu, bilinear_asymmetric) {
         3.f, 3.f, 3.33333f, 3.66667f, 4.f, 4.f,
     };
 
-    for (int k = 0; k < 4; ++k) { //Y
-        for (int l = 0; l < 6; ++l) { //X
+    for (int k = 0; k < 4; ++k) { // Y
+        for (int l = 0; l < 6; ++l) { // X
             auto linear_id = l + k * 6;
             EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 5e-03F) << l << " " << k;
         }
@@ -776,3 +776,1082 @@ INSTANTIATE_TEST_CASE_P(smoke,
                             .smoke_params(data_types::i8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
                             .smoke_params(data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
                         ), );
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+    nearest_mode nm = nearest_mode::ceil;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    float answers[96] = {
+         0.f,  1.f,  1.f,  1.f,
+         2.f,  3.f,  3.f,  3.f,
+         2.f,  3.f,  3.f,  3.f,
+         4.f,  5.f,  5.f,  5.f,
+         4.f,  5.f,  5.f,  5.f,
+         4.f,  5.f,  5.f,  5.f,
+
+         6.f,  7.f,  7.f,  7.f,
+         8.f,  9.f,  9.f,  9.f,
+         8.f,  9.f,  9.f,  9.f,
+        10.f, 11.f, 11.f, 11.f,
+        10.f, 11.f, 11.f, 11.f,
+        10.f, 11.f, 11.f, 11.f,
+
+        12.f, 13.f, 13.f, 13.f,
+        14.f, 15.f, 15.f, 15.f,
+        14.f, 15.f, 15.f, 15.f,
+        16.f, 17.f, 17.f, 17.f,
+        16.f, 17.f, 17.f, 17.f,
+        16.f, 17.f, 17.f, 17.f,
+
+        18.f, 19.f, 19.f, 19.f,
+        20.f, 21.f, 21.f, 21.f,
+        20.f, 21.f, 21.f, 21.f,
+        22.f, 23.f, 23.f, 23.f,
+        22.f, 23.f, 23.f, 23.f,
+        22.f, 23.f, 23.f, 23.f,
+    };
+
+    for (int i = 0; i < 2; ++i) { // B
+        for (int j = 0; j < 2; ++j) { // F
+            for (int k = 0; k < 4; ++k) { // Y
+                for (int l = 0; l < 6; ++l) { // X
+                    auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+                    EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+                }
+            }
+        }
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+    nearest_mode nm = nearest_mode::round_prefer_floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    float answers[96] = {
+         0.f,  0.f,  1.f,  1.f,
+         0.f,  0.f,  1.f,  1.f,
+         2.f,  2.f,  3.f,  3.f,
+         2.f,  2.f,  3.f,  3.f,
+         4.f,  4.f,  5.f,  5.f,
+         4.f,  4.f,  5.f,  5.f,
+
+         6.f,  6.f,  7.f,  7.f,
+         6.f,  6.f,  7.f,  7.f,
+         8.f,  8.f,  9.f,  9.f,
+         8.f,  8.f,  9.f,  9.f,
+        10.f, 10.f, 11.f, 11.f,
+        10.f, 10.f, 11.f, 11.f,
+
+        12.f, 12.f, 13.f, 13.f,
+        12.f, 12.f, 13.f, 13.f,
+        14.f, 14.f, 15.f, 15.f,
+        14.f, 14.f, 15.f, 15.f,
+        16.f, 16.f, 17.f, 17.f,
+        16.f, 16.f, 17.f, 17.f,
+
+        18.f, 18.f, 19.f, 19.f,
+        18.f, 18.f, 19.f, 19.f,
+        20.f, 20.f, 21.f, 21.f,
+        20.f, 20.f, 21.f, 21.f,
+        22.f, 22.f, 23.f, 23.f,
+        22.f, 22.f, 23.f, 23.f,
+    };
+
+    for (int i = 0; i < 2; ++i) { // B
+        for (int j = 0; j < 2; ++j) { // F
+            for (int k = 0; k < 4; ++k) { // Y
+                for (int l = 0; l < 6; ++l) { // X
+                    auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+                    EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+                }
+            }
+        }
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+    nearest_mode nm = nearest_mode::round_prefer_ceil;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    float answers[96] = {
+         0.f,  0.f,  1.f,  1.f,
+         0.f,  0.f,  1.f,  1.f,
+         2.f,  2.f,  3.f,  3.f,
+         2.f,  2.f,  3.f,  3.f,
+         4.f,  4.f,  5.f,  5.f,
+         4.f,  4.f,  5.f,  5.f,
+
+         6.f,  6.f,  7.f,  7.f,
+         6.f,  6.f,  7.f,  7.f,
+         8.f,  8.f,  9.f,  9.f,
+         8.f,  8.f,  9.f,  9.f,
+        10.f, 10.f, 11.f, 11.f,
+        10.f, 10.f, 11.f, 11.f,
+
+        12.f, 12.f, 13.f, 13.f,
+        12.f, 12.f, 13.f, 13.f,
+        14.f, 14.f, 15.f, 15.f,
+        14.f, 14.f, 15.f, 15.f,
+        16.f, 16.f, 17.f, 17.f,
+        16.f, 16.f, 17.f, 17.f,
+
+        18.f, 18.f, 19.f, 19.f,
+        18.f, 18.f, 19.f, 19.f,
+        20.f, 20.f, 21.f, 21.f,
+        20.f, 20.f, 21.f, 21.f,
+        22.f, 22.f, 23.f, 23.f,
+        22.f, 22.f, 23.f, 23.f,
+    };
+
+    for (int i = 0; i < 2; ++i) { // B
+        for (int j = 0; j < 2; ++j) { // F
+            for (int k = 0; k < 4; ++k) { // Y
+                for (int l = 0; l < 6; ++l) { // X
+                    auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+                    EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+                }
+            }
+        }
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+    nearest_mode nm = nearest_mode::floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    float answers[96] = {
+         0.f,  0.f,  0.f,  1.f,
+         0.f,  0.f,  0.f,  1.f,
+         0.f,  0.f,  0.f,  1.f,
+         2.f,  2.f,  2.f,  3.f,
+         2.f,  2.f,  2.f,  3.f,
+         4.f,  4.f,  4.f,  5.f,
+
+         6.f,  6.f,  6.f,  7.f,
+         6.f,  6.f,  6.f,  7.f,
+         6.f,  6.f,  6.f,  7.f,
+         8.f,  8.f,  8.f,  9.f,
+         8.f,  8.f,  8.f,  9.f,
+        10.f, 10.f, 10.f, 11.f,
+
+        12.f, 12.f, 12.f, 13.f,
+        12.f, 12.f, 12.f, 13.f,
+        12.f, 12.f, 12.f, 13.f,
+        14.f, 14.f, 14.f, 15.f,
+        14.f, 14.f, 14.f, 15.f,
+        16.f, 16.f, 16.f, 17.f,
+
+        18.f, 18.f, 18.f, 19.f,
+        18.f, 18.f, 18.f, 19.f,
+        18.f, 18.f, 18.f, 19.f,
+        20.f, 20.f, 20.f, 21.f,
+        20.f, 20.f, 20.f, 21.f,
+        22.f, 22.f, 22.f, 23.f,
+    };
+
+    for (int i = 0; i < 2; ++i) { // B
+        for (int j = 0; j < 2; ++j) { // F
+            for (int k = 0; k < 4; ++k) { // Y
+                for (int l = 0; l < 6; ++l) { // X
+                    auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+                    EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+                }
+            }
+        }
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+    nearest_mode nm = nearest_mode::simple;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    float answers[96] = {
+         0.f,  0.f,  0.f,  1.f,
+         0.f,  0.f,  0.f,  1.f,
+         0.f,  0.f,  0.f,  1.f,
+         2.f,  2.f,  2.f,  3.f,
+         2.f,  2.f,  2.f,  3.f,
+         4.f,  4.f,  4.f,  5.f,
+
+         6.f,  6.f,  6.f,  7.f,
+         6.f,  6.f,  6.f,  7.f,
+         6.f,  6.f,  6.f,  7.f,
+         8.f,  8.f,  8.f,  9.f,
+         8.f,  8.f,  8.f,  9.f,
+        10.f, 10.f, 10.f, 11.f,
+
+        12.f, 12.f, 12.f, 13.f,
+        12.f, 12.f, 12.f, 13.f,
+        12.f, 12.f, 12.f, 13.f,
+        14.f, 14.f, 14.f, 15.f,
+        14.f, 14.f, 14.f, 15.f,
+        16.f, 16.f, 16.f, 17.f,
+
+        18.f, 18.f, 18.f, 19.f,
+        18.f, 18.f, 18.f, 19.f,
+        18.f, 18.f, 18.f, 19.f,
+        20.f, 20.f, 20.f, 21.f,
+        20.f, 20.f, 20.f, 21.f,
+        22.f, 22.f, 22.f, 23.f,
+    };
+
+    for (int i = 0; i < 2; ++i) { // B
+        for (int j = 0; j < 2; ++j) { // F
+            for (int k = 0; k < 4; ++k) { // Y
+                for (int l = 0; l < 6; ++l) { // X
+                    auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+                    EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+                }
+            }
+        }
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 2;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+    nearest_mode nm = nearest_mode::round_prefer_floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0.f,  0.f,  1.f,
+         4.f,  4.f,  5.f,
+
+         6.f,  6.f,  7.f,
+        10.f, 10.f, 11.f,
+
+        12.f, 12.f, 13.f,
+        16.f, 16.f, 17.f,
+
+        18.f, 18.f, 19.f,
+        22.f, 22.f, 23.f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 1;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::pytorch_half_pixel;
+    nearest_mode nm = nearest_mode::round_prefer_floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0.f,  0.f,  1.f,
+         6.f,  6.f,  7.f,
+
+        12.f, 12.f, 13.f,
+        18.f, 18.f, 19.f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 2;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::asymmetric;
+    nearest_mode nm = nearest_mode::round_prefer_floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0.f,  1.f,  1.f,
+         2.f,  3.f,  3.f,
+
+         6.f,  7.f,  7.f,
+         8.f,  9.f,  9.f,
+
+        12.f, 13.f, 13.f,
+        14.f, 15.f, 15.f,
+
+        18.f, 19.f, 19.f,
+        20.f, 21.f, 21.f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 2;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::tf_half_pixel_for_nn;
+    nearest_mode nm = nearest_mode::round_prefer_floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         2.f,  3.f,  3.f,
+         4.f,  5.f,  5.f,
+
+         8.f,  9.f,  9.f,
+        10.f, 11.f, 11.f,
+
+        14.f, 15.f, 15.f,
+        16.f, 17.f, 17.f,
+
+        20.f, 21.f, 21.f,
+        22.f, 23.f, 23.f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 2;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::nearest;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::align_corners;
+    nearest_mode nm = nearest_mode::round_prefer_floor;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0.f,  0.f,  1.f,
+         4.f,  4.f,  5.f,
+
+         6.f,  6.f,  7.f,
+        10.f, 10.f, 11.f,
+
+        12.f, 12.f, 13.f,
+        16.f, 16.f, 17.f,
+
+        18.f, 18.f, 19.f,
+        22.f, 22.f, 23.f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_cubic) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 2;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::cubic;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0.29600694f,  0.8828125f,  1.46961806f,
+         3.53038194f,  4.1171875f,  4.70399306f,
+
+         6.29600694f,  6.8828125f,  7.46961806f,
+         9.53038194f, 10.1171875f, 10.70399306f,
+
+        12.29600694f, 12.8828125f, 13.46961806f,
+        15.53038194f, 16.1171875f, 16.70399306f,
+
+        18.29600694f, 18.8828125f, 19.46961806f,
+        21.53038194f, 22.1171875f, 22.70399306f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 1;
+    int f = 1;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::cubic;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+    set_values(input, {
+        5.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+          5.34722222f,  3.f, 0.65277778f,
+          1.91319444f, 2.5f, 3.08680556f,
+          3.91319444f, 4.5f, 5.08680556f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_linear) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 2;
+    int f = 2;
+    int y = 3;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 2;
+    x = 3;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::caffe_bilinear;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+    set_values(input, {
+        0.f, 1.f, 2.f,
+        3.f, 4.f, 5.f,
+        6.f, 7.f, 8.f,
+        9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f,
+        15.f, 16.f, 17.f,
+        18.f, 19.f, 20.f,
+        21.f, 22.f, 23.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         0.5f,  1.f,  1.5f,
+         3.5f,  4.f,  4.5f,
+
+         6.5f,  7.f,  7.5f,
+         9.5f, 10.f, 10.5f,
+
+        12.5f, 13.f, 13.5f,
+        15.5f, 16.f, 16.5f,
+
+        18.5f, 19.f, 19.5f,
+        21.5f, 22.f, 22.5f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_linear_onnx) {
+    //  Input  : 2x2x3x2
+    //  Output : 2x2x6x4
+    //  Sample Type: Nearest
+
+    const auto& engine = get_test_engine();
+
+    int b = 1;
+    int f = 1;
+    int y = 2;
+    int x = 2;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 4;
+    x = 4;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::bilinear;
+    coordinate_transformation_mode ctm = coordinate_transformation_mode::asymmetric;
+    resample::AxesAndScales axesAndScales;
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm));
+
+    set_values(input, {
+        1.f, 2.f,
+        3.f, 4.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+             1.f, 1.33333f, 1.66667f,      2.f,
+        1.66667f,      2.f, 2.33333f, 2.66667f,
+        2.33333f, 2.66667f,      3.f, 3.33333f,
+             3.f, 3.33333f, 3.66667f,      4.f,
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) {
+    //  Input  : 1x1x2x4
+    //  Output : 1x1x1x2
+    //  Sample Type: Linear
+
+    const auto& engine = get_test_engine();
+
+    int b = 1;
+    int f = 1;
+    int y = 2;
+    int x = 4;
+    tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+    y = 1;
+    x = 2;
+    auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    int32_t antialias = 0;
+    float cube_coeff = -0.75f;
+    resample_type mode = resample_type::caffe_bilinear;
+    resample::AxesAndScales axesAndScales = {
+        {cldnn::resample::resample_axis::along_y, 0.6f},
+        {cldnn::resample::resample_axis::along_x, 0.6f},
+    };
+    shape_calculation_mode shapeCalcMode = shape_calculation_mode::scales;
+    topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+    set_values(input, {
+        1.f, 2.f, 3.f, 4.f,
+        5.f, 6.f, 7.f, 8.f,
+    });
+
+    cldnn::network net {engine, topology };
+
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+
+    auto output = outputs.at("interpolate").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> answers = {
+         2.6666665f,  4.3333331f
+    };
+
+    ASSERT_EQ(answers.size(), output_ptr.size());
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+    }
+}