{ "Copy" , Copy },
{ "Resample" , Resample },
{ "Interp" , Interp },
+ { "Interpolate" , Interpolate },
{ "RegionYolo" , RegionYolo },
{ "ReorgYolo" , ReorgYolo },
{ "Const" , ConstantBlob },
break;
case Interp: CreateInterpPrimitive(topology, layer);
break;
+ case Interpolate: CreateInterpolatePrimitive(topology, layer);
+ break;
case ArgMax:
case ArgMin:
CreateArgMaxMinPrimitive(topology, layer, LayerTypeFromStr(layer->type));
auto outDims = layer->outData[0]->getTensorDesc().getDims();
auto outTensor = CldnnTensorFromIEDims(outDims);
+ std::vector<int> pads_begin(outDims.size(), 0);
+ std::vector<int> pads_end(outDims.size(), 0);
int pad_begin = interpLayer->GetParamAsInt("pad_beg_", 0);
int pad_end = interpLayer->GetParamAsInt("pad_end_", 0);
+ for (size_t i = 2; i < pads_begin.size(); ++i) {
+ pads_begin[i] = pad_begin;
+ pads_end[i] = pad_end;
+ }
int align_corners = interpLayer->GetParamAsInt("align_corners", 1);
std::string resampleLayerName = layer_type_name_ID(layer);
resampleLayerName,
inputPrimitives[0],
outTensor,
- pad_begin,
- pad_end,
+ pads_begin,
+ pads_end,
align_corners,
cldnn::resample_type::bilinear);
AddPrimitiveToProfiler(resampleLayerName, layer);
}
+static cldnn::coordinate_transformation_mode CoordinateTransformationModeFromString(const std::string &str) {
+ static const caseless_map<std::string, cldnn::coordinate_transformation_mode> CoordTransformationMode = {
+ { "half_pixel" , cldnn::coordinate_transformation_mode::half_pixel },
+ { "pytorch_half_pixel" , cldnn::coordinate_transformation_mode::pytorch_half_pixel },
+ { "asymmetric" , cldnn::coordinate_transformation_mode::asymmetric },
+ { "tf_half_pixel_for_nn" , cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn },
+ { "align_corners" , cldnn::coordinate_transformation_mode::align_corners },
+ };
+ auto it = CoordTransformationMode.find(str);
+ if (it != CoordTransformationMode.end())
+ return it->second;
+ else
+ THROW_CLDNN_EXCEPTION("Unknown coordinate transformation mode: " << str);
+}
+
+static cldnn::nearest_mode NearestModeFromString(const std::string &str) {
+ static const caseless_map<std::string, cldnn::nearest_mode> NearestMode = {
+ { "round_prefer_floor" , cldnn::nearest_mode::round_prefer_floor },
+ { "round_prefer_ceil" , cldnn::nearest_mode::round_prefer_ceil },
+ { "floor" , cldnn::nearest_mode::floor },
+ { "ceil" , cldnn::nearest_mode::ceil },
+ { "simple" , cldnn::nearest_mode::simple },
+ };
+ auto it = NearestMode.find(str);
+ if (it != NearestMode.end())
+ return it->second;
+ else
+ THROW_CLDNN_EXCEPTION("Unknown nearest mode: " << str);
+}
+
+static cldnn::shape_calculation_mode ShapeCalculationModeFromString(const std::string &str) {
+ static const caseless_map<std::string, cldnn::shape_calculation_mode> shapeCalcMode = {
+ { "sizes" , cldnn::shape_calculation_mode::sizes },
+ { "scales" , cldnn::shape_calculation_mode::scales },
+ };
+ auto it = shapeCalcMode.find(str);
+ if (it != shapeCalcMode.end())
+ return it->second;
+ else
+ THROW_CLDNN_EXCEPTION("Unknown shape calculation mode: " << str);
+}
+
+inline cldnn::resample::resample_axis InterpolateAxisFromIEAxis(int axis, unsigned sz) {
+ if (axis < 0)
+ axis += sz;
+ if (axis < 0 || axis >= sz)
+ THROW_CLDNN_EXCEPTION("Interpolate axis is not correspond to number of dimensions");
+
+ // Difference in dimension ordering between IE and clDNN,
+ // reverse spatial dimensions after batch and feature.
+ unsigned cldnn_axis = axis;
+ if (axis >= 2) {
+ auto spatial_axis = axis - 2;
+ // Default and minimum number of dimensions is 4
+ auto spatial_size = std::max(sz, 4u) - 2;
+ cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+ }
+
+ switch (cldnn_axis) {
+ case 0:
+ return cldnn::resample::resample_axis::along_b;
+ case 1:
+ return cldnn::resample::resample_axis::along_f;
+ case 2:
+ return cldnn::resample::resample_axis::along_x;
+ case 3:
+ return cldnn::resample::resample_axis::along_y;
+ case 4:
+ return cldnn::resample::resample_axis::along_z;
+ case 5:
+ return cldnn::resample::resample_axis::along_w;
+ default:
+ break;
+ }
+ THROW_CLDNN_EXCEPTION("Unsupported Interpolate axis: " << axis);
+}
+
+void Program::CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
+ ValidateLayer(layer, {3, 4});
+ auto inputPrimitives = GetPrevLayersPrimitives(layer);
+ auto interpolateLayer = as<InferenceEngine::GenericLayer*> (layer);
+
+ std::shared_ptr<Data> insData0 = layer->insData[0].lock();
+ IE_ASSERT(insData0 != nullptr);
+ auto insData0dims = insData0->getTensorDesc().getDims();
+ auto outDims = layer->outData[0]->getTensorDesc().getDims();
+ auto outTensor = CldnnTensorFromIEDims(outDims);
+
+ auto pads_begin = interpolateLayer->GetParamAsInts("pads_begin", {});
+ auto pads_end = interpolateLayer->GetParamAsInts("pads_end", {});
+ for (size_t i = pads_begin.size(); i < outDims.size() || i < 4; ++i)
+ pads_begin.push_back(0);
+ for (size_t i = pads_end.size(); i < outDims.size() || i < 4; ++i)
+ pads_end.push_back(0);
+ std::string mode = interpolateLayer->GetParamAsString("mode");
+ std::string shape_calc_mode = interpolateLayer->GetParamAsString("shape_calculation_mode");
+ std::string coordinate_trans_mode = interpolateLayer->GetParamAsString("coordinate_transformation_mode", "half_pixel");
+ std::string nearest_mode = interpolateLayer->GetParamAsString("nearest_mode", "round_prefer_floor");
+ int antialias = interpolateLayer->GetParamAsBool("antialias", false);
+ float cube_coeff = interpolateLayer->GetParamAsFloat("cube_coeff", -0.75f);
+
+ std::string resampleLayerName = layer_type_name_ID(layer);
+ auto cldnnSampleType = ResampleTypeFromString(mode);
+ auto shapeCalcMode = ShapeCalculationModeFromString(shape_calc_mode);
+ auto coordTransMode = CoordinateTransformationModeFromString(coordinate_trans_mode);
+ auto nearestMode = NearestModeFromString(nearest_mode);
+
+ std::vector<float> scales;
+ auto scalesInput = layer->insData[2].lock();
+ auto scalesInputCreator = getCreatorLayer(scalesInput).lock();
+ if (scalesInputCreator->blobs.size() == 1) {
+ auto constantBlob = scalesInputCreator->blobs.begin()->second;
+ auto axesPrecision = constantBlob->getTensorDesc().getPrecision();
+ if (axesPrecision == InferenceEngine::Precision::FP32) {
+ auto data = constantBlob->buffer().as<float*>();
+ for (size_t i = 0; i < constantBlob->size(); ++i)
+ scales.push_back(data[i]);
+ } else {
+ THROW_IE_EXCEPTION << layer->name << " Incorrect scales input precision";
+ }
+ }
+
+ std::vector<cldnn::resample::resample_axis> axes;
+ if (inputPrimitives.size() == 4) {
+ auto axesInput = layer->insData[3].lock();
+ auto axesInputCreator = getCreatorLayer(axesInput).lock();
+ if (axesInputCreator->blobs.size() == 1) {
+ auto constantBlob = axesInputCreator->blobs.begin()->second;
+ auto axesPrecision = constantBlob->getTensorDesc().getPrecision();
+ if (axesPrecision == InferenceEngine::Precision::I32) {
+ auto data = constantBlob->buffer().as<int32_t*>();
+ for (size_t i = 0; i < constantBlob->size(); ++i)
+ axes.push_back(InterpolateAxisFromIEAxis(data[i], insData0dims.size()));
+ } else if (axesPrecision == InferenceEngine::Precision::I64) {
+ auto data = constantBlob->buffer().as<int64_t*>();
+ for (size_t i = 0; i < constantBlob->size(); ++i)
+ axes.push_back(InterpolateAxisFromIEAxis(static_cast<int32_t>(data[i]), insData0dims.size()));
+ } else {
+ THROW_IE_EXCEPTION << layer->name
+ << " Incorrect axes input precision";
+ }
+ }
+ } else {
+ for (int i = 0; i < insData0dims.size(); ++i) {
+ axes.push_back(InterpolateAxisFromIEAxis(i, insData0dims.size()));
+ }
+ }
+
+ if (axes.size() != scales.size())
+ THROW_IE_EXCEPTION << layer->name << " Incorrect axes and scales should be the same size";
+
+ cldnn::resample::AxesAndScales axesAndScales;
+ for (size_t i = 0; i < axes.size(); ++i) {
+ axesAndScales[axes[i]] = scales[i];
+ }
+
+ if (cldnnSampleType == cldnn::resample_type::linear_onnx) {
+ if (insData0dims.size() != 2 && insData0dims.size() != 4)
+ THROW_CLDNN_EXCEPTION("mode 'linear_onnx' supports only 2D or 4D tensors");
+ if (axes.size() != 2 && insData0dims.size() != axes.size())
+ THROW_CLDNN_EXCEPTION("mode 'linear_onnx' supports only axes with size 2 or equal to input rank");
+ bool correctAxes =
+ ((axes[0] == cldnn::resample::resample_axis::along_b) &&
+ (axes[1] == cldnn::resample::resample_axis::along_f)) ||
+ ((axes[0] == cldnn::resample::resample_axis::along_y) &&
+ (axes[1] == cldnn::resample::resample_axis::along_x));
+ if (axes.size() == 4 && insData0dims.size() == 4) {
+ correctAxes = axes[0] == cldnn::resample::resample_axis::along_b &&
+ axes[1] == cldnn::resample::resample_axis::along_f &&
+ axes[2] == cldnn::resample::resample_axis::along_y &&
+ axes[3] == cldnn::resample::resample_axis::along_x;
+ }
+ if (!correctAxes)
+ THROW_CLDNN_EXCEPTION(
+ "mode 'linear_onnx' supports only case when axes = {2, 3} or "
+ "axes = {0, 1} or axes = {0, 1, 2, 3}");
+ }
+
+ auto resamplePrim = cldnn::resample(
+ resampleLayerName,
+ inputPrimitives[0],
+ outTensor,
+ axesAndScales,
+ pads_begin,
+ pads_end,
+ antialias,
+ cube_coeff,
+ cldnnSampleType,
+ shapeCalcMode,
+ coordTransMode,
+ nearestMode);
+
+ topology.add(resamplePrim);
+ AddPrimitiveToProfiler(resampleLayerName, layer);
+}
+
void Program::CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
ValidateLayer(layer, 1);
auto inputPrimitives = GetPrevLayersPrimitives(layer);
{ "caffe.ResampleParameter.LINEAR" , cldnn::resample_type::caffe_bilinear },
{ "caffe.ResampleParameter.NEAREST" , cldnn::resample_type::nearest },
{ "Interp" , cldnn::resample_type::bilinear },
+ { "linear" , cldnn::resample_type::caffe_bilinear },
+ { "linear_onnx" , cldnn::resample_type::linear_onnx },
+ { "cubic" , cldnn::resample_type::cubic },
+ { "nearest" , cldnn::resample_type::nearest },
};
auto it = UpsamplingTypeNameToType.find(str);
if (it != UpsamplingTypeNameToType.end())
Copy,
Resample,
Interp,
+ Interpolate,
RegionYolo,
ReorgYolo,
ConstantBlob,
void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
+ void CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners,
};
+const std::vector<ngraph::op::v4::Interpolate::ShapeCalcMode> shapeCalculationMode = {
+ ngraph::op::v4::Interpolate::ShapeCalcMode::sizes,
+ ngraph::op::v4::Interpolate::ShapeCalcMode::scales,
+};
+
const std::vector<ngraph::op::v4::Interpolate::NearestMode> nearestModes = {
ngraph::op::v4::Interpolate::NearestMode::simple,
ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
};
const std::vector<std::vector<size_t>> pads = {
- // {0, 0, 1, 1},
+ {0, 0, 1, 1},
{0, 0, 0, 0},
};
-0.75f,
};
+const std::vector<std::vector<int64_t>> defaultAxes = {
+ {2, 3}
+};
+
+const std::vector<std::vector<float>> defaultScales = {
+ {1.33333f, 1.33333f}
+};
+
const auto interpolateCasesWithoutNearest = ::testing::Combine(
::testing::ValuesIn(modesWithoutNearest),
+ ::testing::ValuesIn(shapeCalculationMode),
::testing::ValuesIn(coordinateTransformModes),
::testing::ValuesIn(defaultNearestMode),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads),
::testing::ValuesIn(pads),
- ::testing::ValuesIn(cubeCoefs));
+ ::testing::ValuesIn(cubeCoefs),
+ ::testing::ValuesIn(defaultAxes),
+ ::testing::ValuesIn(defaultScales));
const auto interpolateCases = ::testing::Combine(
::testing::ValuesIn(nearestMode),
+ ::testing::ValuesIn(shapeCalculationMode),
::testing::ValuesIn(coordinateTransformModes),
::testing::ValuesIn(nearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads),
::testing::ValuesIn(pads),
- ::testing::ValuesIn(cubeCoefs));
+ ::testing::ValuesIn(cubeCoefs),
+ ::testing::ValuesIn(defaultAxes),
+ ::testing::ValuesIn(defaultScales));
INSTANTIATE_TEST_CASE_P(Interpolate_Basic, InterpolateLayerTest, ::testing::Combine(
interpolateCasesWithoutNearest,
std::tie(interpolateParams, netPrecision, inputShape, targetShape, targetDevice) = basicParamsSet;
ngraph::op::v4::Interpolate::InterpolateMode mode;
+ ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode;
ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode;
ngraph::op::v4::Interpolate::NearestMode nearestMode;
bool antialias;
std::vector<size_t> padBegin, padEnd;
double cubeCoef;
- std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams;
+ std::vector<int64_t> axes;
+ std::vector<float> scales;
+ std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams;
using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode;
- ShapeCalcMode shape_calc_mode = ShapeCalcMode::sizes;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape);
- std::vector<float> scales(targetShape.size(), 1.0f);
auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales);
auto scalesInput = std::make_shared<ngraph::opset3::Constant>(scales_const);
auto secondaryInput = std::make_shared<ngraph::opset3::Constant>(constant);
- ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shape_calc_mode, padBegin,
+ auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes);
+ auto axesInput = std::make_shared<ngraph::opset3::Constant>(axesConst);
+ ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin,
padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef};
auto interpolate = std::make_shared<ngraph::op::v4::Interpolate>(params[0],
secondaryInput,
scalesInput,
+ axesInput,
interpolateAttributes);
interpolate->get_rt_info() = CPUTestsBase::setCPUInfo(inFmts, outFmts, priority);
const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(interpolate)};
ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners,
};
+const std::vector<ngraph::op::v4::Interpolate::ShapeCalcMode> shapeCalculationMode = {
+ ngraph::op::v4::Interpolate::ShapeCalcMode::sizes,
+ ngraph::op::v4::Interpolate::ShapeCalcMode::scales,
+};
+
const std::vector<ngraph::op::v4::Interpolate::NearestMode> nearestModes = {
ngraph::op::v4::Interpolate::NearestMode::simple,
ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
-0.75f,
};
+const std::vector<std::vector<int64_t>> defaultAxes = {
+ {2, 3}
+};
+
+const std::vector<std::vector<float>> defaultScales = {
+ {1.25f, 1.5f}
+};
+
const auto interpolateCasesNN = ::testing::Combine(
::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest),
+ ::testing::ValuesIn(shapeCalculationMode),
::testing::ValuesIn(coordinateTransformModes),
::testing::ValuesIn(nearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads),
::testing::ValuesIn(pads),
- ::testing::ValuesIn(cubeCoefs));
+ ::testing::ValuesIn(cubeCoefs),
+ ::testing::ValuesIn(defaultAxes),
+ ::testing::ValuesIn(defaultScales));
const auto interpolateCasesLinearOnnx = ::testing::Combine(
::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx),
+ ::testing::ValuesIn(shapeCalculationMode),
::testing::ValuesIn(coordinateTransformModes),
::testing::ValuesIn(defNearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads),
::testing::ValuesIn(pads),
- ::testing::ValuesIn(cubeCoefs));
+ ::testing::ValuesIn(cubeCoefs),
+ ::testing::ValuesIn(defaultAxes),
+ ::testing::ValuesIn(defaultScales));
INSTANTIATE_TEST_CASE_P(InterpolateNN_Layout_Test, InterpolateLayerCPUTest,
::testing::Combine(
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/interpolate.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> prc = {
+ InferenceEngine::Precision::FP16,
+ InferenceEngine::Precision::FP32,
+};
+
+const std::vector<std::vector<size_t>> inShapes = {
+ {1, 1, 23, 23},
+};
+
+const std::vector<std::vector<size_t>> targetShapes = {
+ {1, 1, 46, 46},
+};
+
+const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> modesWithoutNearest = {
+ ngraph::op::v4::Interpolate::InterpolateMode::linear,
+ ngraph::op::v4::Interpolate::InterpolateMode::cubic,
+ ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> nearestMode = {
+ ngraph::op::v4::Interpolate::InterpolateMode::nearest,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::CoordinateTransformMode> coordinateTransformModes = {
+ ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn,
+ ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel,
+ ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel,
+ ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric,
+ ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::ShapeCalcMode> shapeCalculationMode = {
+ ngraph::op::v4::Interpolate::ShapeCalcMode::sizes,
+ ngraph::op::v4::Interpolate::ShapeCalcMode::scales,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::NearestMode> nearestModes = {
+ ngraph::op::v4::Interpolate::NearestMode::simple,
+ ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
+ ngraph::op::v4::Interpolate::NearestMode::floor,
+ ngraph::op::v4::Interpolate::NearestMode::ceil,
+ ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil,
+};
+
+const std::vector<ngraph::op::v4::Interpolate::NearestMode> defaultNearestMode = {
+ ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor,
+};
+
+const std::vector<std::vector<size_t>> pads = {
+ {0, 0, 1, 1},
+ {0, 0, 0, 0},
+};
+
+const std::vector<bool> antialias = {
+// Not enabled in Inference Engine
+// true,
+ false,
+};
+
+const std::vector<double> cubeCoefs = {
+ -0.75f,
+};
+
+const std::vector<std::vector<int64_t>> defaultAxes = {
+ {0, 1, 2, 3}
+};
+
+const std::vector<std::vector<float>> defaultScales = {
+ {1.f, 1.f, 2.f, 2.f}
+};
+
+const auto interpolateCasesWithoutNearest = ::testing::Combine(
+ ::testing::ValuesIn(modesWithoutNearest),
+ ::testing::ValuesIn(shapeCalculationMode),
+ ::testing::ValuesIn(coordinateTransformModes),
+ ::testing::ValuesIn(defaultNearestMode),
+ ::testing::ValuesIn(antialias),
+ ::testing::ValuesIn(pads),
+ ::testing::ValuesIn(pads),
+ ::testing::ValuesIn(cubeCoefs),
+ ::testing::ValuesIn(defaultAxes),
+ ::testing::ValuesIn(defaultScales));
+
+const auto interpolateCasesNearesMode = ::testing::Combine(
+ ::testing::ValuesIn(nearestMode),
+ ::testing::ValuesIn(shapeCalculationMode),
+ ::testing::ValuesIn(coordinateTransformModes),
+ ::testing::ValuesIn(nearestModes),
+ ::testing::ValuesIn(antialias),
+ ::testing::ValuesIn(pads),
+ ::testing::ValuesIn(pads),
+ ::testing::ValuesIn(cubeCoefs),
+ ::testing::ValuesIn(defaultAxes),
+ ::testing::ValuesIn(defaultScales));
+
+INSTANTIATE_TEST_CASE_P(Interpolate_Basic, InterpolateLayerTest, ::testing::Combine(
+ interpolateCasesWithoutNearest,
+ ::testing::ValuesIn(prc),
+ ::testing::ValuesIn(inShapes),
+ ::testing::ValuesIn(targetShapes),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ InterpolateLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(Interpolate_Nearest, InterpolateLayerTest, ::testing::Combine(
+ interpolateCasesNearesMode,
+ ::testing::ValuesIn(prc),
+ ::testing::ValuesIn(inShapes),
+ ::testing::ValuesIn(targetShapes),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ InterpolateLayerTest::getTestCaseName);
+
+} // namespace
R"(.*(ComparisonLayerTest).*)",
// TODO: Issue: 39014
R"(.*CoreThreadingTestsWithIterations.*smoke_LoadNetwork.*)",
+ // TODO: Issue: 39612
+ R"(.*Interpolate.*cubic.*tf_half_pixel_for_nn.*FP16.*)",
};
-}
\ No newline at end of file
+}
typedef std::tuple<
ngraph::op::v4::Interpolate::InterpolateMode, // InterpolateMode
+ ngraph::op::v4::Interpolate::ShapeCalcMode, // ShapeCalculationMode
ngraph::op::v4::Interpolate::CoordinateTransformMode, // CoordinateTransformMode
ngraph::op::v4::Interpolate::NearestMode, // NearestMode
bool, // AntiAlias
std::vector<size_t>, // PadBegin
std::vector<size_t>, // PadEnd
- double // Cube coef
+ double, // Cube coef
+ std::vector<int64_t>, // Axes
+ std::vector<float> // Scales
> InterpolateSpecificParams;
typedef std::tuple<
std::string targetDevice;
std::tie(interpolateParams, netPrecision, inputShapes, targetShapes, targetDevice) = obj.param;
std::vector<size_t> padBegin, padEnd;
+ std::vector<int64_t> axes;
+ std::vector<float> scales;
bool antialias;
ngraph::op::v4::Interpolate::InterpolateMode mode;
+ ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode;
ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode;
ngraph::op::v4::Interpolate::NearestMode nearestMode;
double cubeCoef;
- std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams;
+ std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
result << "TS=" << CommonTestUtils::vec2str(targetShapes) << "_";
result << "InterpolateMode=" << mode << "_";
+ result << "ShapeCalcMode=" << shapeCalcMode << "_";
result << "CoordinateTransformMode=" << coordinateTransformMode << "_";
result << "NearestMode=" << nearestMode << "_";
result << "CubeCoef=" << cubeCoef << "_";
result << "Antialias=" << antialias << "_";
result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_";
result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_";
+ result << "Axes=" << CommonTestUtils::vec2str(axes) << "_";
+ result << "Scales=" << CommonTestUtils::vec2str(scales) << "_";
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice;
return result.str();
std::tie(interpolateParams, netPrecision, inputShape, targetShape, targetDevice) = this->GetParam();
std::vector<size_t> padBegin, padEnd;
+ std::vector<int64_t> axes;
+ std::vector<float> scales;
bool antialias;
ngraph::op::v4::Interpolate::InterpolateMode mode;
+ ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode;
ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode;
ngraph::op::v4::Interpolate::NearestMode nearestMode;
- using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode;
- ShapeCalcMode shape_calc_node = ShapeCalcMode::sizes;
double cubeCoef;
- std:tie(mode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef) = interpolateParams;
+ std:tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
- auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape);
+ auto sizesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape);
+ auto sizesInput = std::make_shared<ngraph::opset3::Constant>(sizesConst);
- std::vector<float> scales(targetShape.size(), 1.0f);
auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales);
-
auto scalesInput = std::make_shared<ngraph::opset3::Constant>(scales_const);
- auto secondaryInput = std::make_shared<ngraph::opset3::Constant>(constant);
+ auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes);
+ auto axesInput = std::make_shared<ngraph::opset3::Constant>(axesConst);
- ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shape_calc_node, padBegin,
+ ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin,
padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef};
auto interpolate = std::make_shared<ngraph::op::v4::Interpolate>(params[0],
- secondaryInput,
+ sizesInput,
scalesInput,
+ axesInput,
interpolateAttributes);
const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(interpolate)};
function = std::make_shared<ngraph::Function>(results, params, "interpolate");
std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::NearestMode type);
+std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type);
+
} // namespace helpers
} // namespace ngraph
return os;
}
+std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type) {
+ switch (type) {
+ case ngraph::op::v4::Interpolate::ShapeCalcMode::scales:
+ os << "scales";
+ break;
+ case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes:
+ os << "sizes";
+ break;
+ default:
+ throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
+ }
+ return os;
+}
+
} // namespace helpers
} // namespace ngraph
#pragma once
#include "primitive.hpp"
+#include <map>
+
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @brief bilinear interpolation.
bilinear,
/// @brief caffe bilinear interpolation.
- caffe_bilinear
+ caffe_bilinear,
+ /// @brief cubic interpolation.
+ cubic,
+ /// @brief linear onnx interpolation.
+ linear_onnx
+};
+
+/// @brief Specifies which of inputs target_spatial_shape or scales is used to calculate an output shape
+enum class shape_calculation_mode : int32_t {
+ /// @brief output shape calculated based on sizes of input and output tensors
+ sizes,
+ /// @brief output shape calculated based on scales coefficients
+ scales
+};
+
+/// @brief Coordinate transformation mode for the @ref resample layer.
+enum class coordinate_transformation_mode : int32_t {
+ /// @brief the coordinate in the original tensor axis `x` is calculated as `((x_resized + 0.5) / scale[x]) - 0.5`.
+ half_pixel,
+ /// @brief the coordinate in the original tensor axis `x` is calculated by `(x_resized + 0.5) / scale[x] - 0.5 if output_shape[x] > 1 else 0.0`.
+ pytorch_half_pixel,
+ /// @brief the coordinate in the original tensor axis `x` is calculated according to the formula `x_resized / scale[x]`.
+ asymmetric,
+ /// @brief the coordinate in the original tensor axis `x` is `(x_resized + 0.5) / scale[x]`.
+ tf_half_pixel_for_nn,
+ /// @brief the coordinate in the original tensor axis `x` is calculated as `0 if output_shape[x] == 1 else x_resized * (input_shape[x] - 1) / (output_shape[x] - 1)`.
+ align_corners
+};
+
+/// @brief Nearest mode for the @ref resample layer.
+enum class nearest_mode : int32_t {
+ /// @brief this mode is known as round half down.
+ round_prefer_floor,
+ /// @brief it is round half up mode.
+ round_prefer_ceil,
+ /// @brief this mode computes the largest integer value not greater than rounded value.
+ floor,
+ /// @brief this mode computes the smallest integer value not less than rounded value
+ ceil,
+ /// @brief this mode behaves as `ceil` mode when `Interpolate` is downsample, and as dropping the fractional part otherwise.
+ simple
};
/// @brief Performs nearest neighbor/bilinear resample
struct resample : public primitive_base<resample> {
CLDNN_DECLARE_PRIMITIVE(resample)
+ enum resample_axis {
+ along_b,
+ along_f,
+ along_x,
+ along_y,
+ along_z,
+ along_w
+ };
+
+ using AxesAndScales = std::map<resample_axis, float>;
+
/// @brief Constructs Resample primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
: primitive_base(id, {input}, output_padding),
output_size(output_size),
num_filter(num_filter),
- pad_begin(0),
- pad_end(0),
align_corners(1),
operation_type(operation_type),
+ shape_calc_mode(shape_calculation_mode::sizes),
with_activation(with_activation),
- activation_negative_slope(activation_slp) {}
+ activation_negative_slope(activation_slp),
+ coord_trans_mode(coordinate_transformation_mode::asymmetric),
+ round_mode(nearest_mode::floor) {
+ if (operation_type == resample_type::caffe_bilinear) {
+ coord_trans_mode = coordinate_transformation_mode::half_pixel;
+ }
+ }
/// @brief Constructs Resample primitive with Interp operation.
/// @param id This primitive id.
/// @param input Input primitive id.
- /// @param pad_begin Optional begin padding for input.
- /// @param pad_end Optional end padding for input.
+ /// @param pads_begin Optional begin padding for input.
+ /// @param pads_end Optional end padding for input.
/// @param align_corners Align corner pixels of the input and output tensors.
/// @param resample_type Resample bilinear method.
/// @param with_activation Enables Relu activation.
resample(const primitive_id& id,
const primitive_id& input,
tensor output_size,
- int32_t pad_begin = 0,
- int32_t pad_end = 0,
+ std::vector<int32_t> pads_begin = {},
+ std::vector<int32_t> pads_end = {},
int32_t align_corners = 1,
resample_type operation_type = resample_type::bilinear,
bool with_activation = false,
: primitive_base(id, {input}, output_padding),
output_size(output_size),
num_filter(0),
- pad_begin(pad_begin),
- pad_end(pad_end),
+ pads_begin(pads_begin),
+ pads_end(pads_end),
align_corners(align_corners),
operation_type(operation_type),
+ shape_calc_mode(shape_calculation_mode::sizes),
with_activation(with_activation),
- activation_negative_slope(activation_slp) {}
+ activation_negative_slope(activation_slp),
+ coord_trans_mode(coordinate_transformation_mode::asymmetric),
+ round_mode(nearest_mode::floor) {}
+
+ /// @brief Constructs Resample primitive with Interpolate operation.
+ /// @param id This primitive id.
+ /// @param input Input primitive id.
+ /// @param pads_begin Optional begin padding for input.
+ /// @param pads_end Optional end padding for input.
+ resample(const primitive_id& id,
+ const primitive_id& input,
+ tensor output_size,
+ AxesAndScales axesAndScales,
+ std::vector<int32_t> pads_begin = {},
+ std::vector<int32_t> pads_end = {},
+ int32_t antialias = 0,
+ float cube_coeff = -0.75f,
+ resample_type mode = resample_type::caffe_bilinear,
+ shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes,
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel,
+ nearest_mode nm = nearest_mode::round_prefer_floor,
+ const padding& output_padding = padding())
+ : primitive_base(id, {input}, output_padding),
+ output_size(output_size),
+ axesAndScales(axesAndScales),
+ pads_begin(pads_begin),
+ pads_end(pads_end),
+ operation_type(mode),
+ shape_calc_mode(shape_calc_mode),
+ with_activation(false),
+ antialias(antialias),
+ cube_coeff(cube_coeff),
+ coord_trans_mode(ctm),
+ round_mode(nm) {}
/// @param scale Resample scale.
tensor output_size;
/// @param num_filter Input filter. Only used by bilinear sample_type.
uint32_t num_filter;
- /// @param pad_begin Begin padding for input.
- int32_t pad_begin;
- /// @param pad_end End padding for input.
- int32_t pad_end;
+ /// @param scales scales for spatial axes.
+ AxesAndScales axesAndScales;
+ /// @param pads_begin Begin paddings for input.
+ std::vector<int32_t> pads_begin;
+ /// @param pads_end End paddings for input.
+ std::vector<int32_t> pads_end;
/// @param align_corners corner pixels of the input and output tensors
int32_t align_corners;
/// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
resample_type operation_type;
+ /// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape.
+ shape_calculation_mode shape_calc_mode;
/// @brief Enables Relu activation.
bool with_activation;
/// @brief Relu activation slope.
float activation_negative_slope;
+ /// @param antialias is a flag that specifies whether to perform anti-aliasing.
+ int32_t antialias;
+ /// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic.
+ float cube_coeff;
+ /// @param specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
+ coordinate_transformation_mode coord_trans_mode;
+ /// @param specifies round mode when mode == nearest and is used only when mode == nearest.
+ nearest_mode round_mode;
};
/// @}
/// @}
NEAREST_NEIGHBOR,
BILINEAR_INTERP,
CAFFE_BILINEAR_INTERP,
+ CUBIC,
+ LINEAR_ONNX,
+};
+
+enum class CoordinateTransformationMode {
+ HALF_PIXEL,
+ PYTORCH_HALF_PIXEL,
+ ASYMMETRIC,
+ TF_HALF_PIXEL_FOR_NN,
+ ALIGN_CORNERS,
+};
+
+enum class NearestMode {
+ ROUND_PREFER_FLOOR,
+ ROUND_PREFER_CEIL,
+ FLOOR,
+ CEIL,
+ SIMPLE,
+};
+
+enum class ShapeCalculationMode {
+ SIZES,
+ SCALES,
+};
+
+enum class InterpolateAxis {
+ X,
+ Y,
+ Z,
+ W,
+ FEATURE,
+ BATCH
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#include <string>
#include <algorithm>
#include <vector>
+#include <unordered_map>
+#include <iostream>
+
+namespace {
+int getAxisIndex(kernel_selector::InterpolateAxis axis) {
+ switch(axis) {
+ case kernel_selector::InterpolateAxis::BATCH:
+ return 0;
+ case kernel_selector::InterpolateAxis::FEATURE:
+ return 1;
+ case kernel_selector::InterpolateAxis::Z:
+ return 2;
+ case kernel_selector::InterpolateAxis::Y:
+ return 3;
+ case kernel_selector::InterpolateAxis::X:
+ return 4;
+ default:
+ return 0;
+ }
+}
+} // namespace
namespace kernel_selector {
if (arg.resampleType == ResampleType::NEAREST_NEIGHBOR)
global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
- else if (arg.resampleType == ResampleType::BILINEAR_INTERP)
+ else if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX)
global = {Align(out.X().v, 32), out.Y().v, out.Batch().v};
else if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP)
global = {out.X().v * out.Y().v, CeilDiv(out.Feature().v, GetFeatureBlockSize(arg)), out.Batch().v * out.Z().v};
+ else
+ global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
- if (arg.resampleType == ResampleType::BILINEAR_INTERP) {
+ if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) {
local[0] = 32;
local[1] = 1;
local[2] = 1;
const auto& input = params.inputs[0];
const auto& output = params.output;
const auto align_corners = params.align_corners;
- const auto pad_begin = params.pad_begin;
- const auto pad_end = params.pad_end;
- const auto x_size_padded = pad_begin + input.X().v + pad_end;
- const auto y_size_padded = pad_begin + input.Y().v + pad_end;
- const auto z_size_padded = pad_begin + input.Z().v + pad_end;
- const auto out_x_size_padded = pad_begin + output.X().v + pad_end;
- const auto out_y_size_padded = pad_begin + output.Y().v + pad_end;
- const auto out_z_size_padded = pad_begin + output.Z().v + pad_end;
- float x_ratio = 0;
- float y_ratio = 0;
- float z_ratio = 0;
+ auto pads_begin = params.pads_begin;
+ auto pads_end = params.pads_end;
+ if (pads_begin.size() == 4)
+ pads_begin.insert(std::next(pads_begin.begin(), 2), 0);
+ if (pads_end.size() == 4)
+ pads_end.insert(std::next(pads_end.begin(), 2), 0);
+
+ const auto b_size_padded = pads_begin[0] + input.Batch().v + pads_end[0];
+ const auto f_size_padded = pads_begin[1] + input.Feature().v + pads_end[1];
+ const auto x_size_padded = pads_begin[4] + input.X().v + pads_end[4];
+ const auto y_size_padded = pads_begin[3] + input.Y().v + pads_end[3];
+ const auto z_size_padded = pads_begin[2] + input.Z().v + pads_end[2];
+ const auto out_b_size_padded = output.Batch().v;
+ const auto out_f_size_padded = output.Feature().v;
+ const auto out_x_size_padded = output.X().v;
+ const auto out_y_size_padded = output.Y().v;
+ const auto out_z_size_padded = output.Z().v;
+ std::vector<float> scales(5);
+ std::vector<int32_t> axesUsed(5, 0);
+ bool paddingUsed = false;
+ for (size_t i = 0; i < pads_begin.size(); ++i) {
+ paddingUsed |= (pads_begin[i] != 0 || pads_end[i] != 0);
+ }
if (align_corners) {
- x_ratio = (out_x_size_padded) > 1 ? static_cast<float>(x_size_padded - 1) / static_cast<float>(out_x_size_padded - 1) : 0.0f;
- y_ratio = (out_y_size_padded) > 1 ? static_cast<float>(y_size_padded - 1) / static_cast<float>(out_y_size_padded - 1) : 0.0f;
- z_ratio = (out_z_size_padded) > 1 ? static_cast<float>(z_size_padded - 1) / static_cast<float>(out_z_size_padded - 1) : 0.0f;
+ scales[0] = (out_b_size_padded) > 1
+ ? static_cast<float>(b_size_padded - 1) / static_cast<float>(out_b_size_padded - 1)
+ : 0.0f;
+ scales[1] = (out_f_size_padded) > 1
+ ? static_cast<float>(f_size_padded - 1) / static_cast<float>(out_f_size_padded - 1)
+ : 0.0f;
+ scales[4] = (out_x_size_padded) > 1
+ ? static_cast<float>(x_size_padded - 1) / static_cast<float>(out_x_size_padded - 1)
+ : 0.0f;
+ scales[3] = (out_y_size_padded) > 1
+ ? static_cast<float>(y_size_padded - 1) / static_cast<float>(out_y_size_padded - 1)
+ : 0.0f;
+ scales[2] = (out_z_size_padded) > 1
+ ? static_cast<float>(z_size_padded - 1) / static_cast<float>(out_z_size_padded - 1)
+ : 0.0f;
} else {
- x_ratio = static_cast<float>(x_size_padded) / static_cast<float>(out_x_size_padded);
- y_ratio = static_cast<float>(y_size_padded) / static_cast<float>(out_y_size_padded);
- z_ratio = static_cast<float>(z_size_padded) / static_cast<float>(out_z_size_padded);
+ scales[0] = static_cast<float>(b_size_padded) / static_cast<float>(out_b_size_padded);
+ scales[1] = static_cast<float>(f_size_padded) / static_cast<float>(out_f_size_padded);
+ scales[4] = static_cast<float>(x_size_padded) / static_cast<float>(out_x_size_padded);
+ scales[3] = static_cast<float>(y_size_padded) / static_cast<float>(out_y_size_padded);
+ scales[2] = static_cast<float>(z_size_padded) / static_cast<float>(out_z_size_padded);
+ }
+ for (const auto& it : params.axesAndScales) {
+ int idx = getAxisIndex(it.first);
+ axesUsed[idx] = 1;
+ if (params.shapeCalculationMode == kernel_selector::ShapeCalculationMode::SCALES)
+ scales[idx] = 1.f / it.second;
+ }
+ for (size_t i = 0; i < scales.size(); ++i) {
+ if (scales[i] != 1.f)
+ axesUsed[i] = 1;
}
jit.AddConstants({
MakeJitConstant(toString(params.resampleType), ""),
- MakeJitConstant("X_RATIO", x_ratio),
- MakeJitConstant("Y_RATIO", y_ratio),
- MakeJitConstant("Z_RATIO", z_ratio),
- MakeJitConstant("X_RATIO_HALF", x_ratio / 2.0f),
- MakeJitConstant("Y_RATIO_HALF", y_ratio / 2.0f),
- MakeJitConstant("Z_RATIO_HALF", z_ratio / 2.0f),
- MakeJitConstant("PAD_BEGIN", pad_begin),
- MakeJitConstant("PAD_END", pad_end),
+ MakeJitConstant(toString(params.nearestMode), ""),
+ MakeJitConstant(toString(params.coordTransMode), ""),
+ MakeJitConstant("SCALES", scales),
+ MakeJitConstant("PADS_BEGIN", pads_begin),
+ MakeJitConstant("PADS_END", pads_end),
+ MakeJitConstant("PADDING_USED", (int)paddingUsed),
+ MakeJitConstant("AXES_USED", axesUsed),
MakeJitConstant("ALIGN_CORNERS", align_corners),
MakeJitConstant("KERNEL_W", 2),
- MakeJitConstant("ANTIALIAS", 0)
+ MakeJitConstant("ANTIALIAS", params.antialias),
+ MakeJitConstant("CUBE_COEFF", params.cube_coeff),
});
size_t feature_block_size = GetFeatureBlockSize(params);
}
}
- if (params.resampleType == ResampleType::BILINEAR_INTERP) {
+ if (params.resampleType == ResampleType::BILINEAR_INTERP || params.resampleType == ResampleType::LINEAR_ONNX) {
if (params.output.X().v % 32 != 0) {
jit.AddConstant(MakeJitConstant("LEFTOVERS", 1));
}
#include "common_kernel_base.h"
+#include <map>
+
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// resample_params
struct resample_params : public base_params {
resample_params() : base_params(KernelType::RESAMPLE) {}
- uint32_t pad_begin = 0;
- uint32_t pad_end = 0;
+ std::vector<int32_t> pads_begin = {};
+ std::vector<int32_t> pads_end = {};
uint32_t align_corners = 0;
ResampleType resampleType = ResampleType::NEAREST_NEIGHBOR;
+ CoordinateTransformationMode coordTransMode = CoordinateTransformationMode::HALF_PIXEL;
+ NearestMode nearestMode = NearestMode::ROUND_PREFER_FLOOR;
+ ShapeCalculationMode shapeCalculationMode = ShapeCalculationMode::SIZES;
+ uint32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ using AxesAndScales = std::map<InterpolateAxis, float>;
+ AxesAndScales axesAndScales;
virtual ParamsKey GetParamsKey() const {
auto k = base_params::GetParamsKey();
k.EnableReampleType(ResampleType::NEAREST_NEIGHBOR);
k.EnableReampleType(ResampleType::CAFFE_BILINEAR_INTERP);
k.EnableReampleType(ResampleType::BILINEAR_INTERP);
+ k.EnableReampleType(ResampleType::CUBIC);
return k;
}
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
#ifdef SAMPLE_TYPE_NEAREST
- const int ix = floor((x + out_x) * X_RATIO);
- const int iy = floor(y * Y_RATIO);
+ const int ix = floor((x + out_x) * SCALES[4]);
+ const int iy = floor(y * SCALES[3]);
in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iy, ix));
#else
- const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(X_RATIO) * (x + out_x);
- const ACCUMULATOR_TYPE iy = TO_ACCUMULATOR_TYPE(Y_RATIO) * y;
+ const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(SCALES[4]) * (x + out_x);
+ const ACCUMULATOR_TYPE iy = TO_ACCUMULATOR_TYPE(SCALES[3]) * y;
const int top_y_index = (int)(floor(iy));
const int bottom_y_index = min((int)ceil(iy), INPUT0_SIZE_Y - 1);
#endif
}
+inline int FUNC(get_nearest_val)(float num, bool is_downsample)
+{
+#if defined(NEAREST_ROUND_PREFER_FLOOR)
+ return (num == (int)num + 0.5f) ? (int)floor(num) : (int)round(num);
+#elif defined(NEAREST_ROUND_PREFER_CEIL)
+ return (int)round(num);
+#elif defined(NEAREST_FLOOR)
+ return (int)floor(num);
+#elif defined(NEAREST_CEIL)
+ return (int)ceil(num);
+#elif defined(NEAREST_SIMPLE)
+ return is_downsample ? (int)ceil(num) : (int)num;
+#else
+#error [clDNN resample_ref.cl]: nearest mode - not supported
+#endif
+}
+
+inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
+{
+#if defined(COORD_TRANS_MODE_HALF_PIXEL)
+ return (num + 0.5f) * scale - 0.5f;
+#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
+ return (length_resized > 1) ? (num + 0.5f) * scale - 0.5f : 0.f;
+#elif defined(COORD_TRANS_MODE_ASYMMETRIC)
+ return num * scale;
+#elif defined(COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN)
+ return (num + 0.5f) * scale;
+#elif defined(COORD_TRANS_MODE_ALIGN_CORNERS)
+ return (length_resized != 1) ? num * (length_original - 1) / (length_resized - 1) : 0.f;
+#else
+#error [clDNN resample_ref.cl]: coordinate transformation mode - not supported
+#endif
+}
+
+inline void FUNC(get_cubic_coeff)(float* cubic_coef, float coord, float coef)
+{
+ float abs_num = fabs(coord);
+ cubic_coef[0] = coef * (abs_num - 1.0) * (abs_num - 1.0) * abs_num;
+ cubic_coef[1] = ((coef + 2.0) * abs_num - (coef + 3.0)) * abs_num * abs_num + 1.0;
+ cubic_coef[2] = (((-coef - 2.0) * abs_num + (2.0 * coef + 3.0)) * abs_num - coef) * abs_num;
+ cubic_coef[3] = -coef * abs_num * abs_num * (abs_num - 1.0);
+}
#define TRIANGLE_COEFF(x) (ACCUMULATOR_MAX_FUNC(ACCUMULATOR_VAL_ZERO, ACCUMULATOR_VAL_ONE - ACCUMULATOR_ABS_FUNC(x)))
#define unroll_for __attribute__((opencl_unroll_hint)) for
#endif
)
{
+ const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
+ const int out_size[5] = { OUTPUT_BATCH_NUM, OUTPUT_FEATURE_NUM, OUTPUT_SIZE_Z, OUTPUT_SIZE_Y, OUTPUT_SIZE_X };
#if defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
typedef MAKE_VECTOR_TYPE(INPUT0_TYPE, PACK_SIZE) in_pack_t;
typedef MAKE_VECTOR_TYPE(OUTPUT_TYPE, PACK_SIZE) out_pack_t;
- const int ox = get_global_id(0);
+ int out_coords[5];
+ out_coords[4] = get_global_id(0);
#if OUTPUT_DIMS <= 4
- const int oy = get_global_id(1);
- const int oz = 0;
-#else
- const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y;
- const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+ out_coords[3] = get_global_id(1);
+ out_coords[2] = 0;
+#else // OUTPUT_DIMS <= 4
+ out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y;
+ out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+#endif // OUTPUT_DIMS <= 4
+ out_coords[1] = ((int)get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM;
+ out_coords[0] = ((int)get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM;
+ int in_coords[5];
+ bool isOutOfBounds = false;
+ unroll_for (int i = 0; i < 5; ++i) {
+ const float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]);
+ const int nearest_pixel = FUNC_CALL(get_nearest_val)(orig_coord, SCALES[i] > 1) - PADS_BEGIN[i];
+ in_coords[i] = max(-PADS_BEGIN[0], min(nearest_pixel, in_size[i] + PADS_END[i] - 1));
+#if PADDING_USED == 1
+ if (in_coords[i] < 0 || in_coords[i] >= in_size[i])
+ isOutOfBounds = true;
#endif
- const int feature = ((int)get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM;
- const int batch = ((int)get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM;
- const int ix = floor(ox * X_RATIO);
- const int iy = floor(oy * Y_RATIO);
- const int iz = floor(oz * Z_RATIO);
+ }
- uint input_idx = FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix);
- uint output_idx = FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox);
+ uint input_idx = FUNC_CALL(get_input_index)(in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4]);
+ uint output_idx = FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4]);
in_pack_t interp_val_pack = ((const __global in_pack_t*)(input + input_idx))[0];
out_pack_t res;
unroll_for (uint pi = 0; pi < PACK_SIZE; ++pi) {
INPUT0_TYPE interp_val = interp_val_pack[pi];
+#if PADDING_USED == 1
+ if (isOutOfBounds)
+ interp_val = INPUT0_VAL_ZERO;
+#endif
#if HAS_FUSED_OPS
- #define OF_ID (feature + pi)
+ #define OF_ID (out_coords[1] + pi)
FUSED_OPS;
res[pi] = FUSED_OPS_RESULT;
- #else
+ #else // HAS_FUSED_OPS
res[pi] = ACTIVATION(interp_val, ACTIVATION_PARAMS);
- #endif
+ #endif // HAS_FUSED_OPS
}
((__global out_pack_t*)(output + output_idx))[0] = res;
-#elif defined(SAMPLE_TYPE_NEAREST)
- const int ox = get_global_id(0);
+#elif defined(SAMPLE_TYPE_NEAREST) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+ int out_coords[5];
+ out_coords[4] = get_global_id(0);
#if OUTPUT_DIMS <= 4
- const int oy = get_global_id(1);
- const int oz = 0;
-#else
- const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y;
- const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+ out_coords[3] = get_global_id(1);
+ out_coords[2] = 0;
+#else // OUTPUT_DIMS <= 4
+ out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y;
+ out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+#endif // OUTPUT_DIMS <= 4
+ out_coords[1] = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
+ out_coords[0] = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
+ int in_coords[5];
+ bool isOutOfBounds = false;
+ unroll_for (int i = 0; i < 5; ++i) {
+ const float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]);
+ int nearest_pixel = FUNC_CALL(get_nearest_val)(orig_coord, SCALES[i] > 1) - PADS_BEGIN[i];
+ in_coords[i] = max(-PADS_BEGIN[i], min(nearest_pixel, in_size[i] + PADS_END[i] - 1));
+#if PADDING_USED == 1
+ if (in_coords[i] < 0 || in_coords[i] >= in_size[i])
+ isOutOfBounds = true;
+#endif
+ }
+ INPUT0_TYPE interp_val = input[FUNC_CALL(get_input_index)(in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4])];
+#if PADDING_USED == 1
+ if (isOutOfBounds)
+ interp_val = INPUT0_VAL_ZERO;
#endif
- const int feature = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
- const int batch = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
- const int ix = floor(ox * X_RATIO);
- const int iy = floor(oy * Y_RATIO);
- const int iz = floor(oz * Z_RATIO);
-
- INPUT0_TYPE interp_val = input[FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix)];
#if HAS_FUSED_OPS
- #define OF_ID (feature)
+ #define OF_ID (out_coords[1])
FUSED_OPS;
OUTPUT_TYPE res = FUSED_OPS_RESULT;
-#else
+#else // HAS_FUSED_OPS
OUTPUT_TYPE res = ACTIVATION(interp_val, ACTIVATION_PARAMS);
+#endif // HAS_FUSED_OPS
+ output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res;
+#elif defined(SAMPLE_TYPE_CUBIC) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+ int out_coords[5];
+ out_coords[4] = get_global_id(0);
+#if OUTPUT_DIMS <= 4
+ out_coords[3] = get_global_id(1);
+ out_coords[2] = 0;
+#else // OUTPUT_DIMS <= 4
+ out_coords[3] = (int)get_global_id(1) % OUTPUT_SIZE_Y;
+ out_coords[2] = (int)get_global_id(1) / OUTPUT_SIZE_Y;
+#endif // OUTPUT_DIMS <= 4
+ out_coords[1] = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
+ out_coords[0] = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
+ int in_coords[5];
+ float cubic_coeff[5][4];
+ unroll_for (int i = 0; i < 5; ++i) {
+ float orig_coord = FUNC_CALL(get_original_coordinate)(out_coords[i], SCALES[i], out_size[i], in_size[i] + PADS_BEGIN[i] + PADS_END[i]) - PADS_BEGIN[i];
+ in_coords[i] = floor(orig_coord);
+ orig_coord = (orig_coord - in_coords[i]) * AXES_USED[i];
+ FUNC_CALL(get_cubic_coeff)(cubic_coeff[i], orig_coord, CUBE_COEFF);
+ }
+
+ INPUT0_TYPE interp_val = INPUT0_VAL_ZERO;
+ int index[5];
+ unroll_for (index[0] = 0; index[0] <= 3; ++index[0]) {
+ unroll_for (index[1] = 0; index[1] <= 3; ++index[1]) {
+ unroll_for (index[2] = 0; index[2] <= 3; ++index[2]) {
+ unroll_for (index[3] = 0; index[3] <= 3; ++index[3]) {
+ unroll_for (index[4] = 0; index[4] <= 3; ++index[4]) {
+ int coords_sum[5] = { in_coords[0], in_coords[1], in_coords[2], in_coords[3], in_coords[4] };
+ float coeff_prod = 1.0f;
+ bool isOutOfBounds = false;
+ unroll_for (int i = 0; i < 5; ++i) {
+ coords_sum[i] = max(-PADS_BEGIN[i], min(in_coords[i] + index[i] - 1, PADS_END[i] + in_size[i] - 1));
+#if PADDING_USED == 1
+ if (coords_sum[i] < 0 || coords_sum[i] >= in_size[i])
+ isOutOfBounds = true;
+#endif
+ coeff_prod *= cubic_coeff[i][index[i]];
+ }
+#if PADDING_USED == 1
+ if (!isOutOfBounds)
+#endif
+ interp_val += coeff_prod * input[FUNC_CALL(get_input_index)(coords_sum[0], coords_sum[1], coords_sum[2], coords_sum[3], coords_sum[4])];
+ }
+ }
+ }
+ }
+ }
+
+#if HAS_FUSED_OPS
+ #define OF_ID (out_coords[1])
+ FUSED_OPS;
+ OUTPUT_TYPE res = FUSED_OPS_RESULT;
+#else // HAS_FUSED_OPS
+ OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
+#endif // HAS_FUSED_OPS
+ output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res;
+#elif defined(SAMPLE_TYPE_LINEAR_ONNX) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+ const int ox = get_global_id(0);
+ const int oy = get_global_id(1);
+ const int feature = 0;
+ const int batch = get_global_id(2);
+ const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3];
+ const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4];
+ const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X);
+ const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y);
+
+#ifdef LEFTOVERS
+ if (ox >= OUTPUT_SIZE_X)
+ return;
#endif
- output[FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox)] = res;
-#elif defined(SAMPLE_TYPE_INTERP)
+ float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
+ float in_x = fmax(0, fmin(ix, PADDED_X - 1));
+ int in_y1 = min((int)in_y, PADDED_Y - 1);
+ int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
+ int in_x1 = min((int)in_x, PADDED_X - 1);
+ int in_x2 = min(in_x1 + 1, PADDED_X - 1);
+
+ const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
+ const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
+ const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
+ const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
+#if PADDING_USED == 1
+ in_y1 -= PADS_BEGIN[3];
+ in_y2 -= PADS_BEGIN[3];
+ in_x1 -= PADS_BEGIN[4];
+ in_x2 -= PADS_BEGIN[4];
+
+ bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
+ bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
+ bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
+ bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
+#endif
+ unroll_for(int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) {
+ INPUT0_TYPE top_left = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x1)];
+ INPUT0_TYPE top_right = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x2)];
+ INPUT0_TYPE bottom_left = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x1)];
+ INPUT0_TYPE bottom_right = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x2)];
+#if PADDING_USED == 1
+ if (tlOutOfBounds)
+ top_left = INPUT0_VAL_ZERO;
+ if (trOutOfBounds)
+ top_right = INPUT0_VAL_ZERO;
+ if (blOutOfBounds)
+ bottom_left = INPUT0_VAL_ZERO;
+ if (brOutOfBounds)
+ bottom_right = INPUT0_VAL_ZERO;
+#endif
+
+ ACCUMULATOR_TYPE interp_val = TO_ACCUMULATOR_TYPE(dx2 * dy2 * top_left) +
+ TO_ACCUMULATOR_TYPE(dx1 * dy2 * top_right) +
+ TO_ACCUMULATOR_TYPE(dx2 * dy1 * bottom_left) +
+ TO_ACCUMULATOR_TYPE(dx1 * dy1 * bottom_right);
+
+#if HAS_FUSED_OPS
+ #define OF_ID (in_f)
+ FUSED_OPS;
+ OUTPUT_TYPE res = FUSED_OPS_RESULT;
+#else
+ OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
+#endif
+ output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res;
+ }
+#elif defined(SAMPLE_TYPE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
const int ox = get_global_id(0);
const int oy = get_global_id(1);
const int feature = 0;
const int batch = get_global_id(2);
- const float ix = X_RATIO * ox;
- const float iy = Y_RATIO * oy;
+ const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], OUTPUT_SIZE_X, in_size[4]);
+ const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], OUTPUT_SIZE_Y, in_size[3]);
#ifdef LEFTOVERS
if (ox >= OUTPUT_SIZE_X)
#endif
const int top_y_index = (int)(floor(iy));
- const int bottom_y_index = min((int)ceil(iy), INPUT0_SIZE_Y - 1);
+ const int bottom_y_index = min((int)ceil(iy), in_size[3] - 1);
const int left_x_index = (int)(floor(ix));
- const int right_x_index = min((int)ceil(ix), INPUT0_SIZE_X - 1);
+ const int right_x_index = min((int)ceil(ix), in_size[4] - 1);
const ACCUMULATOR_TYPE dx = TO_ACCUMULATOR_TYPE(ix - left_x_index);
const ACCUMULATOR_TYPE dy = TO_ACCUMULATOR_TYPE(iy - top_y_index);
FUSED_OPS;
OUTPUT_TYPE res = FUSED_OPS_RESULT;
#else
- OUTPUT_TYPE res = TO_OUTPUT_TYPE(ACTIVATION(interp_val, ACTIVATION_PARAMS));
+ OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
#endif
output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res;
}
-#elif defined(SAMPLE_TYPE_CAFFE_INTERP)
+#elif defined(SAMPLE_TYPE_CAFFE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
const int ox = (int)get_global_id(0) % OUTPUT_SIZE_X;
const int oy = (int)get_global_id(0) / OUTPUT_SIZE_X;
const int feature_block_nun = get_global_id(1);
const int batch = (int)get_global_id(2) % OUTPUT_BATCH_NUM;
const int oz = (int)get_global_id(2) / OUTPUT_BATCH_NUM;
#endif
+ const int PADDED_B = in_size[0] + PADS_BEGIN[0] + PADS_END[0];
+ const int PADDED_F = in_size[1] + PADS_BEGIN[1] + PADS_END[1];
+ const int PADDED_Z = in_size[2] + PADS_BEGIN[2] + PADS_END[2];
+ const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3];
+ const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4];
- const ACCUMULATOR_TYPE ix = ox * X_RATIO + X_RATIO_HALF - 0.5f;
- const ACCUMULATOR_TYPE iy = oy * Y_RATIO + Y_RATIO_HALF - 0.5f;
- const ACCUMULATOR_TYPE iz = oz * Z_RATIO + Z_RATIO_HALF - 0.5f;
+ ACCUMULATOR_TYPE i_b = AXES_USED[0] ? FUNC_CALL(get_original_coordinate)(batch, SCALES[0], out_size[0], PADDED_B) : batch;
+ ACCUMULATOR_TYPE i_f = AXES_USED[1] ? FUNC_CALL(get_original_coordinate)(feature, SCALES[1], out_size[1], PADDED_F) : feature;
+ ACCUMULATOR_TYPE i_x = AXES_USED[4] ? FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X) : ox;
+ ACCUMULATOR_TYPE i_y = AXES_USED[3] ? FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y) : oy;
+ ACCUMULATOR_TYPE i_z = AXES_USED[2] ? FUNC_CALL(get_original_coordinate)(oz, SCALES[2], out_size[2], PADDED_Z) : oz;
+#if PADDING_USED == 1
+ i_b -= PADS_BEGIN[0];
+ i_f -= PADS_BEGIN[1];
+ i_z -= PADS_BEGIN[2];
+ i_y -= PADS_BEGIN[3];
+ i_x -= PADS_BEGIN[4];
+#endif
- const int ix_r = (int)ix;
- const int iy_r = (int)iy;
- const int iz_r = (int)iz;
+ const int ib_r = (int)i_b;
+ const int if_r = (int)i_f;
+ const int ix_r = (int)i_x;
+ const int iy_r = (int)i_y;
+ const int iz_r = (int)i_z;
#if ANTIALIAS == 1
- const ACCUMULATOR_TYPE ax = 1.0f / X_RATIO;
- const ACCUMULATOR_TYPE ay = 1.0f / Y_RATIO;
- const ACCUMULATOR_TYPE az = 1.0f / Z_RATIO;
+ const ACCUMULATOR_TYPE ab = 1.0f / SCALES[0];
+ const ACCUMULATOR_TYPE af = 1.0f / SCALES[1];
+ const ACCUMULATOR_TYPE ax = 1.0f / SCALES[4];
+ const ACCUMULATOR_TYPE ay = 1.0f / SCALES[3];
+ const ACCUMULATOR_TYPE az = 1.0f / SCALES[2];
#else
+ const ACCUMULATOR_TYPE ab = 1.0f;
+ const ACCUMULATOR_TYPE af = 1.0f;
const ACCUMULATOR_TYPE ax = 1.0f;
const ACCUMULATOR_TYPE ay = 1.0f;
const ACCUMULATOR_TYPE az = 1.0f;
#endif
- const int rx = (X_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ax);
- const int ry = (Y_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ay);
- const int rz = (Z_RATIO < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / az);
-
- ACCUMULATOR_TYPE sum[FEATURE_BLOCK_SIZE];
- for (int i = 0; i < FEATURE_BLOCK_SIZE; i++)
- sum[i] = 0;
-
- ACCUMULATOR_TYPE wsum = 0;
-
- int const y_init = max(0, iy_r - ry);
- int const x_init = max(0, ix_r - rx);
- int const z_init = max(0, iz_r - rz);
- int const y_max = min(INPUT0_SIZE_Y, iy_r + ry + 1);
- int const x_max = min(INPUT0_SIZE_X, ix_r + rx + 1);
- int const z_max = min(INPUT0_SIZE_Z, iz_r + rz + 1);
-
- unroll_for(int z = z_init; z < z_max; z++) {
- unroll_for(int y = y_init; y < y_max; y++) {
- unroll_for(int x = x_init; x < x_max; x++) {
- ACCUMULATOR_TYPE dx = ix - x;
- ACCUMULATOR_TYPE dy = iy - y;
- ACCUMULATOR_TYPE dz = iz - z;
-#if ANTIALIAS == 1
- ACCUMULATOR_TYPE w = ax * TRIANGLE_COEFF(ax * dx) * ay * TRIANGLE_COEFF(ay * dy) * az * triangleCoeff(az * dz);
+ const int rb = (SCALES[0] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ab);
+ const int rf = (SCALES[1] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / af);
+ const int rx = (SCALES[4] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ax);
+ const int ry = (SCALES[3] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / ay);
+ const int rz = (SCALES[2] < 1.0f) ? 2 : (int)ceil(TO_ACCUMULATOR_TYPE(KERNEL_W) / az);
+
+ int const b_init = max(-PADS_BEGIN[0], ib_r - rb);
+ int const f_init = max(-PADS_BEGIN[1], if_r - rf);
+ int const y_init = max(-PADS_BEGIN[3], iy_r - ry);
+ int const x_init = max(-PADS_BEGIN[4], ix_r - rx);
+ int const z_init = max(-PADS_BEGIN[2], iz_r - rz);
+ int const b_max = min(PADS_END[0] + INPUT0_BATCH_NUM, ib_r + rb + 1);
+ int const f_max = min(PADS_END[1] + INPUT0_FEATURE_NUM, if_r + rf + 1);
+ int const y_max = min(PADS_END[3] + INPUT0_SIZE_Y, iy_r + ry + 1);
+ int const x_max = min(PADS_END[4] + INPUT0_SIZE_X, ix_r + rx + 1);
+ int const z_max = min(PADS_END[2] + INPUT0_SIZE_Z, iz_r + rz + 1);
+#ifndef LEFTOVERS
+ const int fp_max = FEATURE_BLOCK_SIZE;
#else
- ACCUMULATOR_TYPE w = TRIANGLE_COEFF(dx) * TRIANGLE_COEFF(dy) * TRIANGLE_COEFF(dz);
+ const int fp_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER);
#endif
+ ACCUMULATOR_TYPE sum[fp_max] = {0};
+ ACCUMULATOR_TYPE wsum[fp_max] = {0};
-#ifndef LEFTOVERS
- unroll_for(int f = 0; f < FEATURE_BLOCK_SIZE; f++) {
+ unroll_for(int b = b_init; b < b_max; b++) {
+ unroll_for(int f = f_init; f < f_max; f++) {
+ unroll_for(int z = z_init; z < z_max; z++) {
+ unroll_for(int y = y_init; y < y_max; y++) {
+ unroll_for(int x = x_init; x < x_max; x++) {
+ unroll_for(int fp = 0; fp < fp_max; fp++) {
+#if PADDING_USED == 1
+ bool isOutOfBounds = b < 0 || f < 0 || z < 0 || y < 0 || x < 0 ||
+ b >= in_size[0] || f >= in_size[1] || z >= in_size[2] ||
+ y >= in_size[3] || x >= in_size[4];
+#endif
+
+ ACCUMULATOR_TYPE db = i_b - b;
+ ACCUMULATOR_TYPE df = i_f - f;
+ ACCUMULATOR_TYPE dx = i_x - x;
+ ACCUMULATOR_TYPE dy = i_y - y;
+ ACCUMULATOR_TYPE dz = i_z - z;
+#if ANTIALIAS == 1
+ ACCUMULATOR_TYPE w = ab * TRIANGLE_COEFF(ab * db) *
+ af * TRIANGLE_COEFF(af * df) *
+ ax * TRIANGLE_COEFF(ax * dx) *
+ ay * TRIANGLE_COEFF(ay * dy) *
+ az * TRIANGLE_COEFF(az * dz);
#else
- const int f_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER);
- unroll_for(int f = 0; f < f_max; f++) {
+ ACCUMULATOR_TYPE w = TRIANGLE_COEFF(db) *
+ TRIANGLE_COEFF(df) *
+ TRIANGLE_COEFF(dx) *
+ TRIANGLE_COEFF(dy) *
+ TRIANGLE_COEFF(dz);
#endif
- if (w != 0)
- sum[f] += w * TO_ACCUMULATOR_TYPE(input[FUNC_CALL(get_input_index)(batch, feature + f, z, y, x)]);
+ if (w != 0 && f + fp < INPUT0_FEATURE_NUM) {
+ wsum[fp] += w;
+#if PADDING_USED == 1
+ if (!isOutOfBounds)
+#endif
+ sum[fp] += w * TO_ACCUMULATOR_TYPE(input[FUNC_CALL(get_input_index)(b, f + fp, z, y, x)]);
+ }
+ }
+ }
}
- wsum += w;
}
}
}
-#ifndef LEFTOVERS
- unroll_for (int f = 0; f < FEATURE_BLOCK_SIZE; f++) {
-#else
- const int f_max = min(FEATURE_BLOCK_SIZE, FEATURE_LEFTOVER);
- unroll_for (int f = 0; f < f_max; f++) {
-#endif
-
- ACCUMULATOR_TYPE interp_val = (wsum == 0) ? 0 : (sum[f] / wsum);
+ unroll_for (int f = 0; f < fp_max; f++) {
+ ACCUMULATOR_TYPE interp_val = (wsum[f] == 0) ? ACCUMULATOR_VAL_ZERO : (sum[f] / wsum[f]);
#if HAS_FUSED_OPS
#define OF_ID (feature + f)
FUSED_OPS;
OUTPUT_TYPE res = FUSED_OPS_RESULT;
#else
- OUTPUT_TYPE res = TO_OUTPUT_TYPE(ACTIVATION(interp_val, ACTIVATION_PARAMS));
+ OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
#endif
output[FUNC_CALL(get_output_index)(batch, feature + f, oz, oy, ox)] = res;
}
-#endif
+#endif // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
}
#undef unroll_for
case ResampleType::NEAREST_NEIGHBOR: return "SAMPLE_TYPE_NEAREST";
case ResampleType::BILINEAR_INTERP: return "SAMPLE_TYPE_INTERP";
case ResampleType::CAFFE_BILINEAR_INTERP: return "SAMPLE_TYPE_CAFFE_INTERP";
+ case ResampleType::CUBIC: return "SAMPLE_TYPE_CUBIC";
+ case ResampleType::LINEAR_ONNX: return "SAMPLE_TYPE_LINEAR_ONNX";
+ default: return "";
+ }
+}
+
+std::string toString(CoordinateTransformationMode mode) {
+ switch (mode) {
+ case CoordinateTransformationMode::HALF_PIXEL: return "COORD_TRANS_MODE_HALF_PIXEL";
+ case CoordinateTransformationMode::PYTORCH_HALF_PIXEL: return "COORD_TRANS_MODE_PYTORCH_HALF_PIXEL";
+ case CoordinateTransformationMode::ASYMMETRIC: return "COORD_TRANS_MODE_ASYMMETRIC";
+ case CoordinateTransformationMode::TF_HALF_PIXEL_FOR_NN: return "COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN";
+ case CoordinateTransformationMode::ALIGN_CORNERS: return "COORD_TRANS_MODE_ALIGN_CORNERS";
+ default: return "";
+ }
+}
+
+std::string toString(NearestMode mode) {
+ switch (mode) {
+ case NearestMode::ROUND_PREFER_FLOOR: return "NEAREST_ROUND_PREFER_FLOOR";
+ case NearestMode::ROUND_PREFER_CEIL: return "NEAREST_ROUND_PREFER_CEIL";
+ case NearestMode::FLOOR: return "NEAREST_FLOOR";
+ case NearestMode::CEIL: return "NEAREST_CEIL";
+ case NearestMode::SIMPLE: return "NEAREST_SIMPLE";
default: return "";
}
}
std::string toString(GatherAxis a);
std::string toString(ScatterUpdateAxis a);
std::string toString(ResampleType type);
+std::string toString(CoordinateTransformationMode mode);
+std::string toString(NearestMode mode);
std::string toString(const BorderType type);
std::string toString(const Tensor::Dim& dim);
std::string toString(const DataTensor& tensor);
return kernel_selector::sample_type::CAFFE_BILINEAR_INTERP;
case resample_type::bilinear:
return kernel_selector::sample_type::BILINEAR_INTERP;
+ case resample_type::cubic:
+ return kernel_selector::sample_type::CUBIC;
+ case resample_type::linear_onnx:
+ return kernel_selector::sample_type::LINEAR_ONNX;
default:
return kernel_selector::sample_type::NEAREST_NEIGHBOR;
}
}
+
+inline kernel_selector::coordinate_transformation_mode convert_to_coord_transform_mode(coordinate_transformation_mode mode) {
+ switch (mode) {
+ case coordinate_transformation_mode::half_pixel:
+ return kernel_selector::coordinate_transformation_mode::HALF_PIXEL;
+ case coordinate_transformation_mode::pytorch_half_pixel:
+ return kernel_selector::coordinate_transformation_mode::PYTORCH_HALF_PIXEL;
+ case coordinate_transformation_mode::asymmetric:
+ return kernel_selector::coordinate_transformation_mode::ASYMMETRIC;
+ case coordinate_transformation_mode::tf_half_pixel_for_nn:
+ return kernel_selector::coordinate_transformation_mode::TF_HALF_PIXEL_FOR_NN;
+ case coordinate_transformation_mode::align_corners:
+ return kernel_selector::coordinate_transformation_mode::ALIGN_CORNERS;
+ default:
+ return kernel_selector::coordinate_transformation_mode::HALF_PIXEL;
+ }
+}
+
+inline kernel_selector::nearest_mode convert_to_nearest_mode(nearest_mode mode) {
+ switch (mode) {
+ case nearest_mode::round_prefer_floor:
+ return kernel_selector::nearest_mode::ROUND_PREFER_FLOOR;
+ case nearest_mode::round_prefer_ceil:
+ return kernel_selector::nearest_mode::ROUND_PREFER_CEIL;
+ case nearest_mode::floor:
+ return kernel_selector::nearest_mode::FLOOR;
+ case nearest_mode::ceil:
+ return kernel_selector::nearest_mode::CEIL;
+ case nearest_mode::simple:
+ return kernel_selector::nearest_mode::SIMPLE;
+ default:
+ return kernel_selector::nearest_mode::ROUND_PREFER_FLOOR;
+ }
+}
+
+inline kernel_selector::shape_calculation_mode convert_to_shape_calculation_mode(shape_calculation_mode mode) {
+ switch (mode) {
+ case shape_calculation_mode::sizes:
+ return kernel_selector::shape_calculation_mode::SIZES;
+ case shape_calculation_mode::scales:
+ return kernel_selector::shape_calculation_mode::SCALES;
+ default:
+ return kernel_selector::shape_calculation_mode::SIZES;
+ }
+}
+
+inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis axis) {
+ switch (axis) {
+ case resample::along_x:
+ return kernel_selector::interpolate_axis::X;
+ case resample::along_y:
+ return kernel_selector::interpolate_axis::Y;
+ case resample::along_z:
+ return kernel_selector::interpolate_axis::Z;
+ case resample::along_w:
+ return kernel_selector::interpolate_axis::W;
+ case resample::along_f:
+ return kernel_selector::interpolate_axis::FEATURE;
+ case resample::along_b:
+ return kernel_selector::interpolate_axis::BATCH;
+ default:
+ return kernel_selector::interpolate_axis::BATCH;
+ }
+}
} // namespace
struct resample_gpu : typed_primitive_gpu_impl<resample> {
if (primitive->with_activation)
convert_activation_func_params(primitive, us_params.activations);
+ size_t dimsNum = arg.get_output_layout().format.dimension();
us_params.resampleType = convert_to_sample_type(primitive->operation_type);
+ us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode);
+ us_params.coordTransMode = convert_to_coord_transform_mode(primitive->coord_trans_mode);
+ us_params.shapeCalculationMode = convert_to_shape_calculation_mode(primitive->shape_calc_mode);
+ us_params.antialias = primitive->antialias;
+ us_params.cube_coeff = primitive->cube_coeff;
+ us_params.pads_begin = primitive->pads_begin.empty() ? std::vector<int32_t>(dimsNum, 0) : primitive->pads_begin;
+ us_params.pads_end = primitive->pads_end.empty() ? std::vector<int32_t>(dimsNum, 0) : primitive->pads_end;
+ for (const auto& it : primitive->axesAndScales) {
+ us_params.axesAndScales[convert_axis(it.first)] = it.second;
+ }
if (primitive->operation_type == resample_type::bilinear) {
- us_params.pad_begin = primitive->pad_begin;
- us_params.pad_end = primitive->pad_end;
us_params.align_corners = primitive->align_corners;
}
using tile_axis = kernel_selector::TileAxis;
using tuning_mode = kernel_selector::TuningMode;
using sample_type = kernel_selector::ResampleType;
+using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode;
+using nearest_mode = kernel_selector::NearestMode;
+using shape_calculation_mode = kernel_selector::ShapeCalculationMode;
+using interpolate_axis = kernel_selector::InterpolateAxis;
using border_type = kernel_selector::BorderType;
using gather_axis = kernel_selector::GatherAxis;
using scatter_update_axis = kernel_selector::ScatterUpdateAxis;
resample_info.add("resample_type:", "bilinear_interp");
else if (desc->operation_type == resample_type::caffe_bilinear)
resample_info.add("resample_type:", "caffe_bilinear_interp");
+ else if (desc->operation_type == resample_type::cubic)
+ resample_info.add("resample_type:", "cubic");
else
resample_info.add("resample_type:", "not supported sample type");
+ if (desc->shape_calc_mode == shape_calculation_mode::sizes)
+ resample_info.add("shape_calculation_mode:", "sizes");
+ else
+ resample_info.add("shape_calculation_mode:", "scales");
+
+ if (desc->shape_calc_mode == shape_calculation_mode::scales) {
+ std::string axesAndScalesDump;
+ std::string delim = "";
+ for (auto& it : desc->axesAndScales) {
+ axesAndScalesDump += delim;
+ delim = ", ";
+ if (it.first == resample::resample_axis::along_b)
+ axesAndScalesDump += "b: ";
+ else if (it.first == resample::resample_axis::along_f)
+ axesAndScalesDump += "f: ";
+ else if (it.first == resample::resample_axis::along_x)
+ axesAndScalesDump += "x: ";
+ else if (it.first == resample::resample_axis::along_y)
+ axesAndScalesDump += "y: ";
+ else if (it.first == resample::resample_axis::along_z)
+ axesAndScalesDump += "z: ";
+ else
+ axesAndScalesDump += "w: ";
+ axesAndScalesDump += std::to_string(it.second);
+ }
+ resample_info.add("scales:", axesAndScalesDump);
+ }
+
+ if (desc->coord_trans_mode == coordinate_transformation_mode::half_pixel)
+ resample_info.add("coordinate_transformation_mode:", "half_pixel");
+ else if (desc->coord_trans_mode == coordinate_transformation_mode::pytorch_half_pixel)
+ resample_info.add("coordinate_transformation_mode:", "pytorch_half_pixel");
+ else if (desc->coord_trans_mode == coordinate_transformation_mode::tf_half_pixel_for_nn)
+ resample_info.add("coordinate_transformation_mode:", "tf_half_pixel_for_nn");
+ else if (desc->coord_trans_mode == coordinate_transformation_mode::align_corners)
+ resample_info.add("coordinate_transformation_mode:", "align_corners");
+ else
+ resample_info.add("coordinate_transformation_mode:", "asymmetric");
+
+ if (desc->round_mode == nearest_mode::round_prefer_floor)
+ resample_info.add("nearest_mode:", "round_prefer_floor");
+ if (desc->round_mode == nearest_mode::round_prefer_ceil)
+ resample_info.add("nearest_mode:", "round_prefer_ceil");
+ if (desc->round_mode == nearest_mode::floor)
+ resample_info.add("nearest_mode:", "floor");
+ if (desc->round_mode == nearest_mode::ceil)
+ resample_info.add("nearest_mode:", "ceil");
+ else
+ resample_info.add("nearest_mode:", "simple");
+
resample_info.add("output_size", desc->output_size);
resample_info.add("with activation", desc->with_activation);
resample_info.add("output padding lower size", desc->output_padding.lower_size());
resample_info.add("output padding upper size", desc->output_padding.upper_size());
if (desc->operation_type == resample_type::bilinear) {
- resample_info.add("pad_begin", desc->pad_begin);
- resample_info.add("pad_end", desc->pad_end);
resample_info.add("align_corners", desc->align_corners);
}
12.f,12.f, 9.f, 9.f, -17.f, -17.f,
};
- for (int i = 0; i < 2; ++i) { //B
- for (int j = 0; j < 2; ++j) { //F
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 6; ++l) { //X
+ for (int i = 0; i < 2; ++i) { // B
+ for (int j = 0; j < 2; ++j) { // F
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id]));
}
3.f, 3.25f, 3.75f, 4.f,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 4; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 4; ++l) { // X
auto linear_id = l + k * 4;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
}
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(resample("upsampling", "input", output_size, 0, 0, 0, resample_type::bilinear));
+ topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear));
set_values(input, {
1.f, 2.f,
3.0f, 3.5f, 4.0f, 4.0f,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 4; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 4; ++l) { // X
auto linear_id = l + k * 4;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
}
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(reorder("input_to_b_fs_yx_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32));
- topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, 0, 0, 0, resample_type::bilinear));
+ topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear));
topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32));
set_values(input, {
3.0f, 3.5f, 4.0f, 4.0f,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 4; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 4; ++l) { // X
auto linear_id = l + k * 4;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
}
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(reorder("input_to_fs_b_yx_fsv32", "input", format::fs_b_yx_fsv32, data_types::f16));
- topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, 0, 0, 0, resample_type::bilinear));
+ topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear));
topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32));
set_values(input, {
3.0f, 3.5f, 4.0f, 4.0f,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 4; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 4; ++l) { // X
auto linear_id = l + k * 4;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
}
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(resample("upsampling", "input", output_size, 0, 0, 1, resample_type::bilinear));
+ topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 1, resample_type::bilinear));
set_values(input, {
1.f, 2.f,
3.000000f, 3.333333f, 3.666667f, 4.000000f
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 4; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 4; ++l) { // X
auto linear_id = l + k * 4;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
}
3.f, 3.f, 3.f, 4.f, 4.f,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 5; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 5; ++l) { // X
auto linear_id = l + k * 5;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
}
3, 3, 3, 4, 4,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 5; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 5; ++l) { // X
auto linear_id = l + k * 5;
EXPECT_EQ(answers[linear_id], output_ptr[linear_id]);
}
3.f, 3.f, 3.33333f, 3.66667f, 4.f, 4.f,
};
- for (int k = 0; k < 4; ++k) { //Y
- for (int l = 0; l < 6; ++l) { //X
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
auto linear_id = l + k * 6;
EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 5e-03F) << l << " " << k;
}
.smoke_params(data_types::i8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
.smoke_params(data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
), );
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+ nearest_mode nm = nearest_mode::ceil;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ float answers[96] = {
+ 0.f, 1.f, 1.f, 1.f,
+ 2.f, 3.f, 3.f, 3.f,
+ 2.f, 3.f, 3.f, 3.f,
+ 4.f, 5.f, 5.f, 5.f,
+ 4.f, 5.f, 5.f, 5.f,
+ 4.f, 5.f, 5.f, 5.f,
+
+ 6.f, 7.f, 7.f, 7.f,
+ 8.f, 9.f, 9.f, 9.f,
+ 8.f, 9.f, 9.f, 9.f,
+ 10.f, 11.f, 11.f, 11.f,
+ 10.f, 11.f, 11.f, 11.f,
+ 10.f, 11.f, 11.f, 11.f,
+
+ 12.f, 13.f, 13.f, 13.f,
+ 14.f, 15.f, 15.f, 15.f,
+ 14.f, 15.f, 15.f, 15.f,
+ 16.f, 17.f, 17.f, 17.f,
+ 16.f, 17.f, 17.f, 17.f,
+ 16.f, 17.f, 17.f, 17.f,
+
+ 18.f, 19.f, 19.f, 19.f,
+ 20.f, 21.f, 21.f, 21.f,
+ 20.f, 21.f, 21.f, 21.f,
+ 22.f, 23.f, 23.f, 23.f,
+ 22.f, 23.f, 23.f, 23.f,
+ 22.f, 23.f, 23.f, 23.f,
+ };
+
+ for (int i = 0; i < 2; ++i) { // B
+ for (int j = 0; j < 2; ++j) { // F
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
+ auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+ EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+ }
+ }
+ }
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+ nearest_mode nm = nearest_mode::round_prefer_floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ float answers[96] = {
+ 0.f, 0.f, 1.f, 1.f,
+ 0.f, 0.f, 1.f, 1.f,
+ 2.f, 2.f, 3.f, 3.f,
+ 2.f, 2.f, 3.f, 3.f,
+ 4.f, 4.f, 5.f, 5.f,
+ 4.f, 4.f, 5.f, 5.f,
+
+ 6.f, 6.f, 7.f, 7.f,
+ 6.f, 6.f, 7.f, 7.f,
+ 8.f, 8.f, 9.f, 9.f,
+ 8.f, 8.f, 9.f, 9.f,
+ 10.f, 10.f, 11.f, 11.f,
+ 10.f, 10.f, 11.f, 11.f,
+
+ 12.f, 12.f, 13.f, 13.f,
+ 12.f, 12.f, 13.f, 13.f,
+ 14.f, 14.f, 15.f, 15.f,
+ 14.f, 14.f, 15.f, 15.f,
+ 16.f, 16.f, 17.f, 17.f,
+ 16.f, 16.f, 17.f, 17.f,
+
+ 18.f, 18.f, 19.f, 19.f,
+ 18.f, 18.f, 19.f, 19.f,
+ 20.f, 20.f, 21.f, 21.f,
+ 20.f, 20.f, 21.f, 21.f,
+ 22.f, 22.f, 23.f, 23.f,
+ 22.f, 22.f, 23.f, 23.f,
+ };
+
+ for (int i = 0; i < 2; ++i) { // B
+ for (int j = 0; j < 2; ++j) { // F
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
+ auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+ EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+ }
+ }
+ }
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+ nearest_mode nm = nearest_mode::round_prefer_ceil;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ float answers[96] = {
+ 0.f, 0.f, 1.f, 1.f,
+ 0.f, 0.f, 1.f, 1.f,
+ 2.f, 2.f, 3.f, 3.f,
+ 2.f, 2.f, 3.f, 3.f,
+ 4.f, 4.f, 5.f, 5.f,
+ 4.f, 4.f, 5.f, 5.f,
+
+ 6.f, 6.f, 7.f, 7.f,
+ 6.f, 6.f, 7.f, 7.f,
+ 8.f, 8.f, 9.f, 9.f,
+ 8.f, 8.f, 9.f, 9.f,
+ 10.f, 10.f, 11.f, 11.f,
+ 10.f, 10.f, 11.f, 11.f,
+
+ 12.f, 12.f, 13.f, 13.f,
+ 12.f, 12.f, 13.f, 13.f,
+ 14.f, 14.f, 15.f, 15.f,
+ 14.f, 14.f, 15.f, 15.f,
+ 16.f, 16.f, 17.f, 17.f,
+ 16.f, 16.f, 17.f, 17.f,
+
+ 18.f, 18.f, 19.f, 19.f,
+ 18.f, 18.f, 19.f, 19.f,
+ 20.f, 20.f, 21.f, 21.f,
+ 20.f, 20.f, 21.f, 21.f,
+ 22.f, 22.f, 23.f, 23.f,
+ 22.f, 22.f, 23.f, 23.f,
+ };
+
+ for (int i = 0; i < 2; ++i) { // B
+ for (int j = 0; j < 2; ++j) { // F
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
+ auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+ EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+ }
+ }
+ }
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+ nearest_mode nm = nearest_mode::floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ float answers[96] = {
+ 0.f, 0.f, 0.f, 1.f,
+ 0.f, 0.f, 0.f, 1.f,
+ 0.f, 0.f, 0.f, 1.f,
+ 2.f, 2.f, 2.f, 3.f,
+ 2.f, 2.f, 2.f, 3.f,
+ 4.f, 4.f, 4.f, 5.f,
+
+ 6.f, 6.f, 6.f, 7.f,
+ 6.f, 6.f, 6.f, 7.f,
+ 6.f, 6.f, 6.f, 7.f,
+ 8.f, 8.f, 8.f, 9.f,
+ 8.f, 8.f, 8.f, 9.f,
+ 10.f, 10.f, 10.f, 11.f,
+
+ 12.f, 12.f, 12.f, 13.f,
+ 12.f, 12.f, 12.f, 13.f,
+ 12.f, 12.f, 12.f, 13.f,
+ 14.f, 14.f, 14.f, 15.f,
+ 14.f, 14.f, 14.f, 15.f,
+ 16.f, 16.f, 16.f, 17.f,
+
+ 18.f, 18.f, 18.f, 19.f,
+ 18.f, 18.f, 18.f, 19.f,
+ 18.f, 18.f, 18.f, 19.f,
+ 20.f, 20.f, 20.f, 21.f,
+ 20.f, 20.f, 20.f, 21.f,
+ 22.f, 22.f, 22.f, 23.f,
+ };
+
+ for (int i = 0; i < 2; ++i) { // B
+ for (int j = 0; j < 2; ++j) { // F
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
+ auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+ EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+ }
+ }
+ }
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+ nearest_mode nm = nearest_mode::simple;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ float answers[96] = {
+ 0.f, 0.f, 0.f, 1.f,
+ 0.f, 0.f, 0.f, 1.f,
+ 0.f, 0.f, 0.f, 1.f,
+ 2.f, 2.f, 2.f, 3.f,
+ 2.f, 2.f, 2.f, 3.f,
+ 4.f, 4.f, 4.f, 5.f,
+
+ 6.f, 6.f, 6.f, 7.f,
+ 6.f, 6.f, 6.f, 7.f,
+ 6.f, 6.f, 6.f, 7.f,
+ 8.f, 8.f, 8.f, 9.f,
+ 8.f, 8.f, 8.f, 9.f,
+ 10.f, 10.f, 10.f, 11.f,
+
+ 12.f, 12.f, 12.f, 13.f,
+ 12.f, 12.f, 12.f, 13.f,
+ 12.f, 12.f, 12.f, 13.f,
+ 14.f, 14.f, 14.f, 15.f,
+ 14.f, 14.f, 14.f, 15.f,
+ 16.f, 16.f, 16.f, 17.f,
+
+ 18.f, 18.f, 18.f, 19.f,
+ 18.f, 18.f, 18.f, 19.f,
+ 18.f, 18.f, 18.f, 19.f,
+ 20.f, 20.f, 20.f, 21.f,
+ 20.f, 20.f, 20.f, 21.f,
+ 22.f, 22.f, 22.f, 23.f,
+ };
+
+ for (int i = 0; i < 2; ++i) { // B
+ for (int j = 0; j < 2; ++j) { // F
+ for (int k = 0; k < 4; ++k) { // Y
+ for (int l = 0; l < 6; ++l) { // X
+ auto linear_id = l + k * 6 + j * 4 * 6 + i * 2 * 4 * 6;
+ EXPECT_TRUE(are_equal(answers[linear_id], output_ptr[linear_id])) << linear_id;
+ }
+ }
+ }
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 2;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel;
+ nearest_mode nm = nearest_mode::round_prefer_floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 0.f, 0.f, 1.f,
+ 4.f, 4.f, 5.f,
+
+ 6.f, 6.f, 7.f,
+ 10.f, 10.f, 11.f,
+
+ 12.f, 12.f, 13.f,
+ 16.f, 16.f, 17.f,
+
+ 18.f, 18.f, 19.f,
+ 22.f, 22.f, 23.f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 1;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::pytorch_half_pixel;
+ nearest_mode nm = nearest_mode::round_prefer_floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 0.f, 0.f, 1.f,
+ 6.f, 6.f, 7.f,
+
+ 12.f, 12.f, 13.f,
+ 18.f, 18.f, 19.f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 2;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::asymmetric;
+ nearest_mode nm = nearest_mode::round_prefer_floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 0.f, 1.f, 1.f,
+ 2.f, 3.f, 3.f,
+
+ 6.f, 7.f, 7.f,
+ 8.f, 9.f, 9.f,
+
+ 12.f, 13.f, 13.f,
+ 14.f, 15.f, 15.f,
+
+ 18.f, 19.f, 19.f,
+ 20.f, 21.f, 21.f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 2;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::tf_half_pixel_for_nn;
+ nearest_mode nm = nearest_mode::round_prefer_floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 2.f, 3.f, 3.f,
+ 4.f, 5.f, 5.f,
+
+ 8.f, 9.f, 9.f,
+ 10.f, 11.f, 11.f,
+
+ 14.f, 15.f, 15.f,
+ 16.f, 17.f, 17.f,
+
+ 20.f, 21.f, 21.f,
+ 22.f, 23.f, 23.f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 2;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::nearest;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::align_corners;
+ nearest_mode nm = nearest_mode::round_prefer_floor;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm, nm));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 0.f, 0.f, 1.f,
+ 4.f, 4.f, 5.f,
+
+ 6.f, 6.f, 7.f,
+ 10.f, 10.f, 11.f,
+
+ 12.f, 12.f, 13.f,
+ 16.f, 16.f, 17.f,
+
+ 18.f, 18.f, 19.f,
+ 22.f, 22.f, 23.f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_cubic) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 2;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::cubic;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 0.29600694f, 0.8828125f, 1.46961806f,
+ 3.53038194f, 4.1171875f, 4.70399306f,
+
+ 6.29600694f, 6.8828125f, 7.46961806f,
+ 9.53038194f, 10.1171875f, 10.70399306f,
+
+ 12.29600694f, 12.8828125f, 13.46961806f,
+ 15.53038194f, 16.1171875f, 16.70399306f,
+
+ 18.29600694f, 18.8828125f, 19.46961806f,
+ 21.53038194f, 22.1171875f, 22.70399306f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 1;
+ int f = 1;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::cubic;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+ set_values(input, {
+ 5.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 5.34722222f, 3.f, 0.65277778f,
+ 1.91319444f, 2.5f, 3.08680556f,
+ 3.91319444f, 4.5f, 5.08680556f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_linear) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 2;
+ int f = 2;
+ int y = 3;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 2;
+ x = 3;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::caffe_bilinear;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+ set_values(input, {
+ 0.f, 1.f, 2.f,
+ 3.f, 4.f, 5.f,
+ 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f,
+ 12.f, 13.f, 14.f,
+ 15.f, 16.f, 17.f,
+ 18.f, 19.f, 20.f,
+ 21.f, 22.f, 23.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 0.5f, 1.f, 1.5f,
+ 3.5f, 4.f, 4.5f,
+
+ 6.5f, 7.f, 7.5f,
+ 9.5f, 10.f, 10.5f,
+
+ 12.5f, 13.f, 13.5f,
+ 15.5f, 16.f, 16.5f,
+
+ 18.5f, 19.f, 19.5f,
+ 21.5f, 22.f, 22.5f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in2x2x3x2_linear_onnx) {
+ // Input : 2x2x3x2
+ // Output : 2x2x6x4
+ // Sample Type: Nearest
+
+ const auto& engine = get_test_engine();
+
+ int b = 1;
+ int f = 1;
+ int y = 2;
+ int x = 2;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 4;
+ x = 4;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::bilinear;
+ coordinate_transformation_mode ctm = coordinate_transformation_mode::asymmetric;
+ resample::AxesAndScales axesAndScales;
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::sizes;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm));
+
+ set_values(input, {
+ 1.f, 2.f,
+ 3.f, 4.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 1.f, 1.33333f, 1.66667f, 2.f,
+ 1.66667f, 2.f, 2.33333f, 2.66667f,
+ 2.33333f, 2.66667f, 3.f, 3.33333f,
+ 3.f, 3.33333f, 3.66667f, 4.f,
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
+
+TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) {
+ // Input : 1x1x2x4
+ // Output : 1x1x1x2
+ // Sample Type: Linear
+
+ const auto& engine = get_test_engine();
+
+ int b = 1;
+ int f = 1;
+ int y = 2;
+ int x = 4;
+ tensor shape = tensor{batch(b), feature(f), spatial(x, y)};
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape });
+
+ y = 1;
+ x = 2;
+ auto output_size = tensor(batch(b), feature(f), spatial(x, y));
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ int32_t antialias = 0;
+ float cube_coeff = -0.75f;
+ resample_type mode = resample_type::caffe_bilinear;
+ resample::AxesAndScales axesAndScales = {
+ {cldnn::resample::resample_axis::along_y, 0.6f},
+ {cldnn::resample::resample_axis::along_x, 0.6f},
+ };
+ shape_calculation_mode shapeCalcMode = shape_calculation_mode::scales;
+ topology.add(resample("interpolate", "input", output_size, axesAndScales, {0, 0, 0, 0}, {0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode));
+
+ set_values(input, {
+ 1.f, 2.f, 3.f, 4.f,
+ 5.f, 6.f, 7.f, 8.f,
+ });
+
+ cldnn::network net {engine, topology };
+
+ net.set_input_data("input", input);
+
+ auto outputs = net.execute();
+
+ auto output = outputs.at("interpolate").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> answers = {
+ 2.6666665f, 4.3333331f
+ };
+
+ ASSERT_EQ(answers.size(), output_ptr.size());
+ for (size_t i = 0; i < answers.size(); ++i) {
+ EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i;
+ }
+}
}
}
}
-}
\ No newline at end of file
+}