for (auto& a : permuteLayer->GetParamAsInts("order"))
ie_order.push_back(static_cast<uint16_t>(a));
+ auto outDesc = layer->outData[0]->getTensorDesc();
+ auto outDims = outDesc.getDims();
+
+ int rank = std::max(4, static_cast<int>(outDims.size()));
+ if (ie_order.empty()) {
+ // if order size is empty - we need to set inversed axes order
+ for (int o = rank - 1; o >= 0; o--)
+ ie_order.push_back((uint16_t)o);
+ }
+
// if order size is less than 4 - fill the rest with just copy
- for (auto o = ie_order.size(); o < 4; o++)
+ for (auto o = ie_order.size(); o < rank; o++)
ie_order.push_back((uint16_t)o);
/*
auto inputPrimitives = GetPrevLayersPrimitives(layer);
auto tileLayer = as<InferenceEngine::GenericLayer*> (layer);
- int axis = tileLayer->GetParamAsInt("axis", 1);
- int tiles = tileLayer->GetParamAsInt("tiles");
-
- auto sz = tileLayer->input().get()->getTensorDesc().getDims().size();
-
- auto cldnnAxisFromIE = [&](int axis) {
- switch (axis) {
- case 0: return cldnn::tile::tile_axis::along_b;
- case 1: return cldnn::tile::tile_axis::along_f;
- case 2:
- if (sz > 4)
- return cldnn::tile::tile_axis::along_z;
- else
- return cldnn::tile::tile_axis::along_y;
- case 3:
- if (sz > 4)
- return cldnn::tile::tile_axis::along_y;
- else
- return cldnn::tile::tile_axis::along_x;
- case 4: return cldnn::tile::tile_axis::along_x;
- default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis);
- }
- };
-
std::string tileLayerName = layer_type_name_ID(layer);
auto tilePrim = cldnn::tile(
tileLayerName,
inputPrimitives[0],
- cldnnAxisFromIE(axis),
- tiles);
+ CldnnTensorFromIEDims(tileLayer->outData[0]->getTensorDesc().getDims()));
topology.add(tilePrim);
AddPrimitiveToProfiler(tileLayerName, layer);
step_h = static_cast<float>(img_h) / inp_dims.at(img_dims.size() - 2);
}
+ auto output_dt = DataTypeFromPrecision(layer->outData[0]->getTensorDesc().getPrecision());
+
std::vector<cldnn::primitive_id> inputPrimitives = GetPrevLayersPrimitives(layer);
// second input isn't used by value - only dimensions taken from the layer input
std::string priorBoxLayerName = layer_type_name_ID(layer);
step_h,
offset,
width,
- height);
+ height,
+ output_dt);
topology.add(priorBoxPrim);
AddPrimitiveToProfiler(priorBoxLayerName, layer);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/batch_norm.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<double> epsilon = {
+ 1e-6,
+ 1e-5,
+ 1e-4
+};
+const std::vector<std::vector<size_t>> inputShapes = {
+ {1, 3},
+ {2, 5},
+ {1, 3, 10},
+ {1, 3, 1, 1},
+ {2, 5, 4, 4},
+};
+
+
+const auto batchNormParams = testing::Combine(
+ testing::ValuesIn(epsilon),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::ValuesIn(inputShapes),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_BatchNorm,
+ BatchNormLayerTest,
+ batchNormParams,
+ BatchNormLayerTest::getTestCaseName
+);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convert.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16,
+ InferenceEngine::Precision::U8,
+ InferenceEngine::Precision::I8,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_NoReshape, ConvertLayerTest,
+ ::testing::Combine(
+ ::testing::Values(inShape),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ ConvertLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convert_like.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16,
+ InferenceEngine::Precision::U8,
+ InferenceEngine::Precision::I8,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_NoReshape, ConvertLikeLayerTest,
+ ::testing::Combine(
+ ::testing::Values(inShape),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(inShape),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ ConvertLikeLayerTest::getTestCaseName);
+
+} // namespace
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
- ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(additional_config));
const auto LogicalTestParamsNot = ::testing::Combine(
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
- ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(additional_config));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs, LogicalLayerTest, LogicalTestParams, LogicalLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefsNot, LogicalLayerTest, LogicalTestParamsNot, LogicalLayerTest::getTestCaseName);
-} // namespace
\ No newline at end of file
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/mat_mul.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<ShapeRelatedParams> shapeRelatedParams = {
+ { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } }
+};
+
+std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
+ ngraph::helpers::InputLayerType::CONSTANT,
+ ngraph::helpers::InputLayerType::PARAMETER,
+};
+
+std::map<std::string, std::string> additional_config = {};
+
+INSTANTIATE_TEST_CASE_P(smoke_MatMul, MatMulTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(shapeRelatedParams),
+ ::testing::ValuesIn(inputPrecisions),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::ValuesIn(secondaryInputTypes),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
+ ::testing::Values(additional_config)),
+ MatMulTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "single_layer_tests/minimum_maximum.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<std::vector<std::vector<size_t>>> inShapes = {
+ {{2}, {1}},
+ {{1, 1, 1, 3}, {1}},
+ {{1, 2, 4}, {1}},
+ {{1, 4, 4}, {1}},
+ {{1, 4, 4, 1}, {1}},
+ {{256, 56}, {256, 56}},
+ {{8, 1, 6, 1}, {7, 1, 5}},
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16,
+};
+
+const std::vector<ngraph::helpers::MinMaxOpType> opType = {
+ ngraph::helpers::MinMaxOpType::MINIMUM,
+ ngraph::helpers::MinMaxOpType::MAXIMUM,
+};
+
+const std::vector<ngraph::helpers::InputLayerType> inputType = {
+ ngraph::helpers::InputLayerType::CONSTANT,
+ ngraph::helpers::InputLayerType::PARAMETER,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_maximum, MaxMinLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes),
+ ::testing::ValuesIn(opType),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::ValuesIn(inputType),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ MaxMinLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/normalize_l2.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<int64_t>> axes = {
+ {},
+ {1},
+};
+const std::vector<float> eps = {1e-7f, 1e-6f, 1e-5f, 1e-4f};
+
+const std::vector<ngraph::op::EpsMode> epsMode = {
+ ngraph::op::EpsMode::ADD,
+ ngraph::op::EpsMode::MAX,
+};
+
+const auto normL2params = testing::Combine(
+ testing::ValuesIn(axes),
+ testing::ValuesIn(eps),
+ testing::ValuesIn(epsMode),
+ testing::Values(std::vector<size_t>{1, 3, 10, 5}),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ NormalizeL2,
+ NormalizeL2LayerTest,
+ normL2params,
+ NormalizeL2LayerTest::getTestCaseName
+);
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/pad.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<int64_t>> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}};
+const std::vector<std::vector<int64_t>> padsEnd2D = {{0, 0}, {1, 1}, {0, 1}, {3, 2}};
+const std::vector<float> argPadValue = {0.f, 1.f, 2.f, -1.f};
+
+const std::vector<ngraph::helpers::PadMode> padMode = {
+ ngraph::helpers::PadMode::EDGE,
+ ngraph::helpers::PadMode::REFLECT,
+};
+
+const auto pad2DConstparams = testing::Combine(
+ testing::ValuesIn(padsBegin2D),
+ testing::ValuesIn(padsEnd2D),
+ testing::ValuesIn(argPadValue),
+ testing::Values(ngraph::helpers::PadMode::CONSTANT),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::Values(std::vector<size_t>{13, 5}),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_Pad2DConst,
+ PadLayerTest,
+ pad2DConstparams,
+ PadLayerTest::getTestCaseName
+);
+
+const auto pad2Dparams = testing::Combine(
+ testing::ValuesIn(padsBegin2D),
+ testing::ValuesIn(padsEnd2D),
+ testing::Values(0),
+ testing::ValuesIn(padMode),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::Values(std::vector<size_t>{13, 5}),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_Pad2D,
+ PadLayerTest,
+ pad2Dparams,
+ PadLayerTest::getTestCaseName
+);
+
+const std::vector<std::vector<int64_t>> padsBegin4D = {{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 0, 1, 0}, {0, 3, 0, 1}};
+const std::vector<std::vector<int64_t>> padsEnd4D = {{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 0, 0, 1}, {1, 3, 2, 0}};
+
+const auto pad4DConstparams = testing::Combine(
+ testing::ValuesIn(padsBegin4D),
+ testing::ValuesIn(padsEnd4D),
+ testing::ValuesIn(argPadValue),
+ testing::Values(ngraph::helpers::PadMode::CONSTANT),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::Values(std::vector<size_t>{3, 5, 10, 11}),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_Pad4DConst,
+ PadLayerTest,
+ pad4DConstparams,
+ PadLayerTest::getTestCaseName
+);
+
+const auto pad4Dparams = testing::Combine(
+ testing::ValuesIn(padsBegin4D),
+ testing::ValuesIn(padsEnd4D),
+ testing::Values(0),
+ testing::ValuesIn(padMode),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::Values(std::vector<size_t>{3, 5, 10, 11}),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_Pad4D,
+ PadLayerTest,
+ pad4Dparams,
+ PadLayerTest::getTestCaseName
+);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "single_layer_tests/power.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+ std::vector<std::vector<std::vector<size_t>>> inShapes = {
+ {{1, 8}},
+ {{2, 16}},
+ {{3, 32}},
+ {{4, 64}},
+ {{5, 128}},
+ {{6, 256}},
+ {{7, 512}},
+ {{8, 1024}}
+ };
+
+ std::vector<std::vector<float >> Power = {
+ {0.0f},
+ {0.5f},
+ {1.0f},
+ {1.1f},
+ {1.5f},
+ {2.0f},
+ };
+
+ std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16,
+ };
+
+ INSTANTIATE_TEST_CASE_P(smoke_power, PowerLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
+ ::testing::ValuesIn(Power)),
+ PowerLayerTest::getTestCaseName);
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/region_yolo.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<ngraph::Shape> inShapes_caffe = {
+ {1, 125, 13, 13}
+};
+
+const std::vector<ngraph::Shape> inShapes_mxnet = {
+ {1, 75, 52, 52},
+ {1, 75, 32, 32},
+ {1, 75, 26, 26},
+ {1, 75, 16, 16},
+ {1, 75, 13, 13},
+ {1, 75, 8, 8}
+};
+
+const std::vector<ngraph::Shape> inShapes_v3 = {
+ {1, 255, 52, 52},
+ {1, 255, 26, 26},
+ {1, 255, 13, 13}
+};
+
+const std::vector<std::vector<int64_t>> masks = {
+ {0, 1, 2},
+ {3, 4, 5},
+ {6, 7, 8}
+};
+
+const std::vector<bool> do_softmax = {true, false};
+const std::vector<size_t> classes = {80, 20};
+const std::vector<size_t> num_regions = {5, 9};
+const size_t coords = 4;
+const int start_axis = 1;
+const int end_axis = 3;
+
+const auto testCase_yolov3 = ::testing::Combine(
+ ::testing::ValuesIn(inShapes_v3),
+ ::testing::Values(classes[0]),
+ ::testing::Values(coords),
+ ::testing::Values(num_regions[1]),
+ ::testing::Values(do_softmax[1]),
+ ::testing::Values(masks[2]),
+ ::testing::Values(start_axis),
+ ::testing::Values(end_axis),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_yolov3_mxnet = ::testing::Combine(
+ ::testing::ValuesIn(inShapes_mxnet),
+ ::testing::Values(classes[1]),
+ ::testing::Values(coords),
+ ::testing::Values(num_regions[1]),
+ ::testing::Values(do_softmax[1]),
+ ::testing::Values(masks[1]),
+ ::testing::Values(start_axis),
+ ::testing::Values(end_axis),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_yolov2_caffe = ::testing::Combine(
+ ::testing::ValuesIn(inShapes_caffe),
+ ::testing::Values(classes[1]),
+ ::testing::Values(coords),
+ ::testing::Values(num_regions[0]),
+ ::testing::Values(do_softmax[0]),
+ ::testing::Values(masks[0]),
+ ::testing::Values(start_axis),
+ ::testing::Values(end_axis),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_TestsRegionYolov3, RegionYoloLayerTest, testCase_yolov3, RegionYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsRegionYoloMxnet, RegionYoloLayerTest, testCase_yolov3_mxnet, RegionYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsRegionYoloCaffe, RegionYoloLayerTest, testCase_yolov2_caffe, RegionYoloLayerTest::getTestCaseName);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/reorg_yolo.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<ngraph::Shape> inShapes_caffe_yolov2 = {
+ {1, 64, 26, 26},
+};
+
+const std::vector<ngraph::Shape> inShapes = {
+ {1, 4, 4, 4},
+ {1, 8, 4, 4},
+ {1, 9, 3, 3},
+ {1, 24, 34, 62},
+ {2, 8, 4, 4},
+};
+
+const std::vector<size_t> strides = {
+ 2, 3
+};
+
+const auto testCase_caffe_yolov2 = ::testing::Combine(
+ ::testing::ValuesIn(inShapes_caffe_yolov2),
+ ::testing::Values(strides[0]),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_smallest = ::testing::Combine(
+ ::testing::Values(inShapes[0]),
+ ::testing::Values(strides[0]),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_stride_2 = ::testing::Combine(
+ ::testing::Values(inShapes[1]),
+ ::testing::Values(strides[0]),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_stride_3 = ::testing::Combine(
+ ::testing::Values(inShapes[2]),
+ ::testing::Values(strides[1]),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_smaller_h = ::testing::Combine(
+ ::testing::Values(inShapes[4]),
+ ::testing::Values(strides[0]),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_batch_2 = ::testing::Combine(
+ ::testing::Values(inShapes[3]),
+ ::testing::Values(strides[0]),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_caffe_YoloV2, ReorgYoloLayerTest, testCase_caffe_yolov2, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_stride_2_smallest, ReorgYoloLayerTest, testCase_smallest, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_stride_2, ReorgYoloLayerTest, testCase_stride_2, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_stride_3, ReorgYoloLayerTest, testCase_stride_3, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_smaller_h, ReorgYoloLayerTest, testCase_smaller_h, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_batch_2, ReorgYoloLayerTest, testCase_batch_2, ReorgYoloLayerTest::getTestCaseName);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/softmax.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+};
+
+const std::vector<InferenceEngine::Layout> inputLayouts2D = {
+ InferenceEngine::Layout::NC,
+};
+
+const std::vector<InferenceEngine::SizeVector> inputShapes2D = {
+ InferenceEngine::SizeVector {1, 100},
+ InferenceEngine::SizeVector {100, 1},
+ InferenceEngine::SizeVector {10, 10},
+};
+
+const std::vector<size_t> axis2D = {
+ 0, 1
+};
+
+const auto params2D = testing::Combine(
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::ValuesIn(inputLayouts2D),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::ValuesIn(inputShapes2D),
+ testing::ValuesIn(axis2D),
+ testing::Values(CommonTestUtils::DEVICE_GPU),
+ testing::Values(std::map<std::string, std::string>())
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_SoftMax2D,
+ SoftMaxLayerTest,
+ params2D,
+ SoftMaxLayerTest::getTestCaseName
+);
+
+const std::vector<InferenceEngine::SizeVector> inputShapes4D = {
+ InferenceEngine::SizeVector {1, 100, 1, 1},
+ InferenceEngine::SizeVector {1, 3, 4, 3},
+ InferenceEngine::SizeVector {2, 3, 4, 5},
+};
+
+const std::vector<size_t> axis4D = {0, 1, 2, 3};
+
+const auto params4D = testing::Combine(
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::NCHW),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::ValuesIn(inputShapes4D),
+ testing::ValuesIn(axis4D),
+ testing::Values(CommonTestUtils::DEVICE_GPU),
+ testing::Values(std::map<std::string, std::string>())
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_SoftMax4D,
+ SoftMaxLayerTest,
+ params4D,
+ SoftMaxLayerTest::getTestCaseName
+);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/opsets/opset3.hpp>
+
+#include "single_layer_tests/space_to_depth.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::opset3;
+
+namespace {
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::U8,
+ InferenceEngine::Precision::I16,
+};
+
+const std::vector<SpaceToDepth::SpaceToDepthMode> modes = {
+ SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+ SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST};
+
+const std::vector<std::vector<size_t >> inputShapesBS2 = {
+ {1, 1, 2, 2}, {1, 1, 4, 4}, {1, 1, 6, 6}, {2, 8, 6, 6}, {2, 4, 10, 8},
+ {1, 1, 2, 2, 2}, {1, 1, 4, 4, 4}, {1, 1, 6, 6, 6}, {2, 8, 6, 6, 6}, {2, 4, 10, 8, 12}};
+
+const auto SpaceToDepthBS2 = ::testing::Combine(
+ ::testing::ValuesIn(inputShapesBS2),
+ ::testing::ValuesIn(inputPrecisions),
+ ::testing::ValuesIn(modes),
+ ::testing::Values(2),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(SpaceToDepthBS2, SpaceToDepthLayerTest, SpaceToDepthBS2, SpaceToDepthLayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t >> inputShapesBS3 = {
+ {1, 1, 3, 3}, {1, 1, 6, 6}, {1, 1, 9, 9}, {2, 4, 9, 9}, {2, 3, 15, 12},
+ {1, 1, 3, 3, 3}, {1, 1, 6, 6, 6}, {1, 1, 9, 9, 9}, {2, 4, 9, 9, 9}, {2, 3, 15, 12, 18}};
+
+const auto SpaceToDepthBS3 = ::testing::Combine(
+ ::testing::ValuesIn(inputShapesBS3),
+ ::testing::ValuesIn(inputPrecisions),
+ ::testing::ValuesIn(modes),
+ ::testing::Values(3),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(SpaceToDepthBS3, SpaceToDepthLayerTest, SpaceToDepthBS3, SpaceToDepthLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/tile.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32
+};
+
+const std::vector<std::vector<size_t>> repeats = {
+ {1, 2, 3},
+ {2, 1, 1},
+ {2, 3, 1},
+ {2, 2, 2},
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_Tile, TileLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(repeats),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(std::vector<size_t>({2, 3, 4})),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ TileLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_Tile6d, TileLayerTest,
+ ::testing::Combine(
+ ::testing::Values(std::vector<size_t>({1, 1, 1, 2, 1, 2})),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(std::vector<size_t>({1, 4, 3, 1, 3, 1})),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ TileLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/topk.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<int64_t> axes = {
+ 0,
+ 1,
+ 2,
+};
+
+const std::vector<int64_t> k = {
+ 1,
+ 5,
+ 10,
+};
+
+const std::vector<ngraph::opset4::TopK::Mode> modes = {
+ ngraph::opset4::TopK::Mode::MIN,
+ ngraph::opset4::TopK::Mode::MAX
+};
+
+const std::vector<ngraph::opset4::TopK::SortType> sortTypes = {
+ ngraph::opset4::TopK::SortType::NONE,
+ ngraph::opset4::TopK::SortType::SORT_INDICES,
+ ngraph::opset4::TopK::SortType::SORT_VALUES,
+};
+
+
+INSTANTIATE_TEST_CASE_P(smoke_TopK, TopKLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(k),
+ ::testing::ValuesIn(axes),
+ ::testing::ValuesIn(modes),
+ ::testing::ValuesIn(sortTypes),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ ::testing::Values(InferenceEngine::Layout::ANY),
+ ::testing::Values(std::vector<size_t>({10, 10, 10})),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ TopKLayerTest::getTestCaseName);
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/transpose.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {
+ std::vector<size_t>{1, 3, 100, 100},
+};
+
+const std::vector<std::vector<size_t>> inputOrder = {
+ std::vector<size_t>{0, 3, 2, 1},
+ std::vector<size_t>{},
+};
+
+const auto params = testing::Combine(
+ testing::ValuesIn(inputOrder),
+ testing::ValuesIn(netPrecisions),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::Values(InferenceEngine::Layout::ANY),
+ testing::ValuesIn(inputShapes),
+ testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+ smoke_Transpose,
+ TransposeLayerTest,
+ params,
+ TransposeLayerTest::getTestCaseName
+);
+
+} // namespace
R"(.*EltwiseLayerTest.*IS=\(.*\..*\..*\..*\..*\).*eltwiseOpType=Pow.*secondaryInputType=CONSTANT.*)",
// TODO: Issue: 40958
R"(.*(ConstantResultSubgraphTest).*)",
+
+ // TODO: Issue: 41467 -- "unsupported element type f16 op Convert"
+ R"(.*(ConvertLayerTest).*targetPRC=FP16.*)",
+ // TODO: Issue: 41466 -- "Unsupported op 'ConvertLike'"
+ R"(.*(ConvertLikeLayerTest).*)",
+ // TODO: Issue: 41462
+ R"(.*(SoftMaxLayerTest).*axis=0.*)",
+ // TODO: Issue: 41461
+ R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
+ R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
};
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/multiply_add.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {
+ {1, 3},
+ {1, 3, 2},
+ {1, 3, 2, 5},
+ {1, 3, 2, 5, 4},
+ {1, 3, 2, 2, 4, 5},
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_MultipleAdd_Nd, MultiplyAddLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inputShapes),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ MultiplyAddLayerTest::getTestCaseName);
+
+} // namespace
auto constant_2 = ngraph::builder::makeConstant<float>(ngPrc, { 1, inputShape[0], outputSize },
generateFloatNumbers(0, 1, inputShape[0] * outputSize), false);
- auto add_0 = std::make_shared<ngraph::op::Add>(unsqueeze_0, constant_2);
+ auto add_0 = std::make_shared<ngraph::op::v1::Add>(unsqueeze_0, constant_2);
auto constant_3 = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 1 }, std::vector<size_t>{0});
auto squeeze_0 = std::make_shared<ngraph::op::Squeeze>(add_0, constant_3);
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
const float offset,
const std::vector<float>& widths,
const std::vector<float>& heights,
+ data_types output_dt,
const padding& output_padding = padding())
- : primitive_base(id, {input}, output_padding),
+ : primitive_base(id, {input}, output_padding, optional_data_type{output_dt}),
img_size(img_size),
flip(false),
clip(clip),
/*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
/// @brief Constructs tile primitive.
/// @param id This primitive id.
- /// @param axis Tiling axis
- /// @param tiles Tiles number across an axis
+ /// @param out_shape The shape of tiled tensor.
tile(const primitive_id& id,
const primitive_id& input,
- const tile_axis axis,
- const int tiles,
+ const tensor out_shape,
const padding& output_padding = padding())
- : primitive_base(id, {input}, output_padding), axis(axis), tiles(tiles) {}
+ : primitive_base(id, {input}, output_padding), out_shape(out_shape) {}
- /// @brief Tiling axis
- tile_axis axis;
- /// @brief Tiles number across an axis
- int tiles;
+ /// @brief Shape of the output tensor
+ tensor out_shape;
};
/// @}
/// @}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// TileAxis
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-enum class TileAxis {
- X,
- Y,
- Z,
- FEATURE,
- BATCH,
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// DepthToSpaceMode
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
enum class DepthToSpaceMode {
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::UINT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::UINT32);
k.EnableOutputDataType(Datatype::INT64);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
#include <string>
namespace kernel_selector {
-static int32_t GetTileChannelIndex(const tile_params& params) {
- Tensor::DataChannelName name = Tensor::DataChannelName::X;
- switch (params.axis) {
- case TileAxis::X:
- name = Tensor::DataChannelName::X;
- break;
- case TileAxis::Y:
- name = Tensor::DataChannelName::Y;
- break;
- case TileAxis::Z:
- name = Tensor::DataChannelName::Z;
- break;
- case TileAxis::FEATURE:
- name = Tensor::DataChannelName::FEATURE;
- break;
- case TileAxis::BATCH:
- name = Tensor::DataChannelName::BATCH;
- break;
- default:
- break;
- }
-
- return DataTensor::Channelndex(params.output.GetLayout(), name);
-}
ParamsKey TileKernelRef::GetSupportedKey() const {
ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+ k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+ k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfzyx);
+ k.EnableInputLayout(DataLayout::bfwzyx);
+ k.EnableOutputLayout(DataLayout::bfwzyx);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const {
CommonDispatchData dispatchData;
- auto in = params.inputs[0];
-
- size_t inner_size = 1;
- size_t outer_size = 1;
-
- const int32_t axis = GetTileChannelIndex(params);
-
- for (int32_t i = 0; i <= axis; i++) {
- inner_size *= in.GetDims()[i].v;
- }
-
- for (int32_t i = axis + 1; i < static_cast<int32_t>(in.GetDims().size()); i++) {
- outer_size *= in.GetDims()[i].v;
- }
-
- if (inner_size > 1) {
- dispatchData.gws[0] = outer_size;
- dispatchData.gws[1] = inner_size;
- dispatchData.gws[2] = 1;
-
- dispatchData.lws[0] = 1;
- dispatchData.lws[1] = 1;
- dispatchData.lws[2] = 1;
- } else {
- dispatchData.gws[0] = Align(outer_size, 16);
- dispatchData.gws[1] = 1;
- dispatchData.gws[2] = 1;
+ auto out = params.output;
- dispatchData.lws[0] = 16;
- dispatchData.lws[1] = 1;
- dispatchData.lws[2] = 1;
- }
+ dispatchData.gws = {out.X().v * out.Y().v, out.Z().v * out.W().v, out.Batch().v * out.Feature().v};
+ dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
return dispatchData;
}
JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
-
- auto in = params.inputs[0];
- auto out = params.output;
-
- size_t inner_size = 1;
- size_t outer_size = 1;
- size_t axis_pitch = 1;
-
- const int32_t axis = GetTileChannelIndex(params);
-
- for (int32_t i = 0; i <= axis; i++) {
- inner_size *= in.GetDims()[i].v;
- axis_pitch *= in.GetDims()[i].LogicalDimPadded();
- }
- for (int32_t i = axis + 1; i < static_cast<int32_t>(in.GetDims().size()); i++) {
- outer_size *= in.GetDims()[i].v;
- }
-
- jit.AddConstant(MakeJitConstant("TILES", params.tiles));
- jit.AddConstant(MakeJitConstant("AXIS_PITCH", axis_pitch));
- jit.AddConstant(MakeJitConstant("OUTER_SIZE", outer_size));
- if (inner_size == 1) {
- jit.AddConstant(MakeJitConstant("OUTPUT_ELEMENTS", out.LogicalSize()));
- jit.AddConstant(MakeJitConstant("DENSE", 1));
- }
return jit;
}
// tile_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct tile_params : public base_params {
- tile_params() : base_params(KernelType::TILE), axis(TileAxis::BATCH), tiles(0) {}
-
- TileAxis axis;
- int tiles;
+ tile_params() : base_params(KernelType::TILE) {}
virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
};
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
#include "include/data_types.cl"
-KERNEL (reshape_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL (reshape_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
{
const uint d1 = get_global_id(0);
const uint d2 = get_global_id(1);
const uint od4 = linear % OUTPUT_SIZES[3]; linear /= OUTPUT_SIZES[3];
const uint od5 = linear % OUTPUT_SIZES[4]; linear /= OUTPUT_SIZES[4];
const uint od6 = linear % OUTPUT_SIZES[5]; linear /= OUTPUT_SIZES[5];
-
+
uint input_offset = INPUT0_OFFSET +
d1*INPUT0_PITCHES[0] +
d2*INPUT0_PITCHES[1] +
od4*OUTPUT_PITCHES[3] +
od5*OUTPUT_PITCHES[4] +
od6*OUTPUT_PITCHES[5];
-
- output[output_offset] = ACTIVATION(input[input_offset], ACTIVATION_PARAMS);
+
+ output[output_offset] = ACTIVATION(TO_OUTPUT_TYPE(input[input_offset]), ACTIVATION_PARAMS);
}
#include "include/include_all.cl"
-KERNEL(strided_slice_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint feature = get_global_id(1);
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
#include "include/common.cl"
#include "include/data_types.cl"
+#include "include/fetch.cl"
-#if DENSE
-__attribute__((intel_reqd_sub_group_size(16)))
-__attribute__((reqd_work_group_size(16, 1, 1)))
-#endif
-KERNEL (tile_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL(tile_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
{
-#if DENSE
+ const uint x = (uint)get_global_id(0) % OUTPUT_SIZE_X;
+ const uint y = (uint)get_global_id(0) / OUTPUT_SIZE_X;
+ const uint f = (uint)get_global_id(2) / OUTPUT_BATCH_NUM;
+ const uint b = (uint)get_global_id(2) % OUTPUT_BATCH_NUM;
+ #if OUTPUT_DIMS == 6
+ const uint z = (uint)get_global_id(1) % OUTPUT_SIZE_Z;
+ const uint w = (uint)get_global_id(1) / OUTPUT_SIZE_Z;
+ const uint out_offset = OUTPUT_GET_INDEX(b, f, w, z, y, x);
+ const uint in_offset = INPUT0_GET_INDEX_SAFE(b, f, w, z, y, x);
+ #elif OUTPUT_DIMS == 5
+ const uint z = (uint)get_global_id(1);
+ const uint out_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+ const uint in_offset = INPUT0_GET_INDEX_SAFE(b, f, z, y, x);
+ #elif OUTPUT_DIMS == 4
+ const uint out_offset = OUTPUT_GET_INDEX(b, f, y, x);
+ const uint in_offset = INPUT0_GET_INDEX_SAFE(b, f, y, x);
+ #endif
- const uint id = get_global_id(0);
- const uint group_id = id / 16;
- const uint lid = get_local_id(0);
- const uint idx = min((uint)(id), (uint)(OUTER_SIZE - 1));
- UNIT_TYPE val = input[idx];
-
- for (int t = 0; t < TILES; t++)
- {
- UNIT_TYPE save_val = intel_sub_group_shuffle(val, (t*16 + lid)/TILES);
- int offset = group_id*16*TILES + t*16 + lid;
- if (offset < OUTPUT_ELEMENTS)
- output[offset] = save_val;
- }
-#else
- const uint outer_idx = get_global_id(0);
- const uint inner_idx = get_global_id(1);
- if (inner_idx >= AXIS_PITCH) return;
-
- for (int t = 0; t < TILES; t++)
- {
- output[outer_idx*TILES*AXIS_PITCH + t*AXIS_PITCH + inner_idx] = input[outer_idx*AXIS_PITCH + inner_idx];
- }
-#endif
+ output[out_offset] = input[in_offset];
}
if (conf.empty())
return jit;
- for (auto& c : conf) {
- std::string fused_ops;
- std::string fused_ops_preload;
- std::string fused_ops_calc;
- std::string in_name = c.input_var_name;
- Datatype in_type = c.input_dt;
- bool can_all_use_preload = true;
-
- for (size_t i = 0; i < params.fused_ops.size(); i++) {
- auto fused_dep_codegen = FusedOpsCodeGenerator(params.fused_ops[i]);
- std::string out_var;
- Datatype out_type;
- jit.Merge(fused_dep_codegen.MakeLoadJitConstants(c, params.output));
- jit.Merge(fused_dep_codegen.MakeOpJitConstants(c, in_name, in_type, out_var, out_type));
- in_name = out_var;
- in_type = out_type;
-
- bool can_use_preload = fused_dep_codegen.CanPreloadData(c);
- can_all_use_preload &= can_use_preload;
- bool can_preload_eltwise = true;
- if (params.fused_ops[i].GetType() == FusedOpType::ELTWISE &&
- c.load_type == FusedOpsConfiguration::LoadType::FEATURE_SHUFFLE)
- can_preload_eltwise = false;
- fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
- fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
- if (can_use_preload && can_preload_eltwise)
- fused_ops_preload += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
- if (c.allow_for_partial_preload && (!can_use_preload || !can_preload_eltwise))
- fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
- fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+ try {
+ for (auto& c : conf) {
+ std::string fused_ops;
+ std::string fused_ops_preload;
+ std::string fused_ops_calc;
+ std::string in_name = c.input_var_name;
+ Datatype in_type = c.input_dt;
+ bool can_all_use_preload = true;
+
+ for (size_t i = 0; i < params.fused_ops.size(); i++) {
+ auto fused_dep_codegen = FusedOpsCodeGenerator(params.fused_ops[i]);
+ std::string out_var;
+ Datatype out_type;
+ jit.Merge(fused_dep_codegen.MakeLoadJitConstants(c, params.output));
+ jit.Merge(fused_dep_codegen.MakeOpJitConstants(c, in_name, in_type, out_var, out_type));
+ in_name = out_var;
+ in_type = out_type;
+
+ bool can_use_preload = fused_dep_codegen.CanPreloadData(c);
+ can_all_use_preload &= can_use_preload;
+ bool can_preload_eltwise = true;
+ if (params.fused_ops[i].GetType() == FusedOpType::ELTWISE &&
+ c.load_type == FusedOpsConfiguration::LoadType::FEATURE_SHUFFLE)
+ can_preload_eltwise = false;
+ fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+ fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+ if (can_use_preload && can_preload_eltwise)
+ fused_ops_preload += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+ if (c.allow_for_partial_preload && (!can_use_preload || !can_preload_eltwise))
+ fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+ fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+ }
+
+ jit.AddConstant(MakeJitConstant("FUSED_OPS" + c.suffix, fused_ops));
+ jit.AddConstant(MakeJitConstant("FUSED_OPS_PRELOAD" + c.suffix, fused_ops_preload));
+ jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc));
+ jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, in_name));
+
+ bool can_any_use_preload = !fused_ops_preload.empty();
+ jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix,
+ can_all_use_preload || (c.allow_for_partial_preload && can_any_use_preload)));
}
- jit.AddConstant(MakeJitConstant("FUSED_OPS" + c.suffix, fused_ops));
- jit.AddConstant(MakeJitConstant("FUSED_OPS_PRELOAD" + c.suffix, fused_ops_preload));
- jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc));
- jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, in_name));
-
- bool can_any_use_preload = !fused_ops_preload.empty();
- jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix,
- can_all_use_preload || (c.allow_for_partial_preload && can_any_use_preload)));
+ jit.Merge(MakeFusedOpsDeclsJitConstants(params, conf));
+ } catch (std::exception& ex) {
+ throw std::runtime_error("Fused op code generation for node " + params.layerID + " failed with error: " + ex.what());
}
- jit.Merge(MakeFusedOpsDeclsJitConstants(params, conf));
-
return jit;
}
}
}
-std::string toString(TileAxis a) {
- switch (a) {
- case TileAxis::X: return "X";
- case TileAxis::Y: return "Y";
- case TileAxis::FEATURE: return "FEATURE";
- case TileAxis::BATCH: return "BATCH";
- default: return "";
- }
-}
-
std::string toString(GatherAxis a) {
switch (a) {
case GatherAxis::X: return "X";
std::string toString(MVNMode mode);
std::string toString(WeightsLayout layout);
std::string toString(ConcatAxis a);
-std::string toString(TileAxis a);
std::string toString(GatherAxis a);
std::string toString(ScatterUpdateAxis a);
std::string toString(ResampleType type);
deconv_info.add("stride", strd.to_string());
deconv_info.add("input offset", desc->input_offset.to_string());
deconv_info.add("split", split);
+ deconv_info.add("groups", desc->groups);
if (desc->with_output_size) {
json_composite ud_out_size_info;
ud_out_size_info.add("size", desc->output_size.to_string());
attach_strided_slice_gpu::attach_strided_slice_gpu() {
auto val_fw = strided_slice_gpu::create;
- implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
- val_fw);
- implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
- val_fw);
- implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
- val_fw);
- implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
- val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
+ implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
}
} // namespace detail
/*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
namespace cldnn {
namespace gpu {
-kernel_selector::tile_axis convert_axis(tile::tile_axis axis) {
- switch (axis) {
- case tile::along_x:
- return kernel_selector::tile_axis::X;
- case tile::along_y:
- return kernel_selector::tile_axis::Y;
- case tile::along_z:
- return kernel_selector::tile_axis::Z;
- case tile::along_f:
- return kernel_selector::tile_axis::FEATURE;
- case tile::along_b:
- return kernel_selector::tile_axis::BATCH;
- default:
- return kernel_selector::tile_axis::X;
- }
-}
-
struct tile_gpu : typed_primitive_gpu_impl<tile> {
using parent = typed_primitive_gpu_impl<tile>;
using parent::parent;
auto tile_optional_params =
get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
- tile_params.axis = convert_axis(arg.get_primitive()->axis);
- tile_params.tiles = arg.get_primitive()->tiles;
-
auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
attach_tile_gpu::attach_tile_gpu() {
auto val_fw = tile_gpu::create;
- implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
- implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+ implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
}
} // namespace detail
using mean_subtruct_mode = kernel_selector::MeanSubtractMode;
using mean_op = kernel_selector::MeanOp;
using concat_axis = kernel_selector::ConcatAxis;
-using tile_axis = kernel_selector::TileAxis;
using tuning_mode = kernel_selector::TuningMode;
using sample_type = kernel_selector::ResampleType;
using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode;
/*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
primitive_inst = find_primitive(id);
if (primitive_inst == nullptr)
- throw std::runtime_error("topology doesn't contain prmitive:" + id);
+ throw std::runtime_error("topology doesn't contain primitive:" + id);
if (primitive_inst->type() != input_layout::type_id()) {
CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input");
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
result = get_program().get_engine().allocate_memory(get_output_layout(), 0, false);
// perform calculations
- if (input().get_output_layout().data_type == data_types::f16)
+ if (get_output_layout().data_type == data_types::f16)
calculate_prior_box_output<data_type_to_type<data_types::f16>::type>(*result,
input().get_output_layout(),
*typed_desc());
}
layout prior_box_inst::calc_output_layout(prior_box_node const& node) {
- assert(static_cast<bool>(node.get_primitive()->output_data_type) == false &&
- "Output data type forcing is not supported for prior_box_node!");
auto desc = node.get_primitive();
auto input_layout = node.input().get_output_layout();
// Second feature stores the variance of each prior coordinate.
auto output_data_type = input_layout.data_type == data_types::f16 ? data_types::f16 : data_types::f32;
+ if (node.get_primitive()->output_data_type)
+ output_data_type = *node.get_primitive()->output_data_type;
return {output_data_type, cldnn::format::bfyx, cldnn::tensor(1, 2, 1, layer_width * layer_height * num_priors * 4)};
}
/*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
std::unique_ptr<json_composite> node_info = std::unique_ptr<json_composite>(new json_composite());
node_info->add("ptr", "node_" + std::to_string(reinterpret_cast<uintptr_t>(this)));
node_info->add("id", id());
- node_info->add("type", get_extr_type(typeid(*this).name()));
+ node_info->add("type", desc->type_string());
node_info->add("internal", bool_to_str(this->is_type<internal_primitive>()));
node_info->add("valid output layout", bool_to_str(valid_output_layout));
/*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
auto input_layout = node.input().get_output_layout();
auto input_format = input_layout.format;
- auto result_sizes = input_layout.size.sizes();
-
- auto axis_index = node.get_primitive()->axis;
- auto tiles = node.get_primitive()->tiles;
-
- // calculate sum of features from all inputs
- result_sizes[axis_index] *= tiles;
- return layout{input_layout.data_type, input_format, (tensor) result_sizes};
+ return layout{input_layout.data_type, input_format, desc->out_shape};
}
std::string tile_inst::to_string(tile_node const& node) {
json_composite tile_info;
tile_info.add("input id", input.id());
- tile_info.add("axis", desc->axis);
- tile_info.add("tiles", desc->tiles);
-
node_info->add("tile info", tile_info);
node_info->dump(primitive_description);
/*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
- topology.add(tile("tile", "reshape", tile::along_y, 4));
+ topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4)));
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
set_values(input, input_vec);
/*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(tile("tile", "input", tile::along_b, 2));
+ topology.add(tile("tile", "input", tensor(2, 2, 2, 2)));
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
2.f, 0.f, 6.f, 5.2f };
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(tile("tile", "input", tile::along_f, 2));
+ topology.add(tile("tile", "input", tensor(1, 4, 2, 2)));
std::vector<float> input_vec = { 1.f, 0.f,
5.f, 1.5f,
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
- auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
+ auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(tile("tile", "input", tile::along_y, 2));
+ topology.add(tile("tile", "input", tensor(1, 2, 2, 4)));
std::vector<float> input_vec = { 1.f, 0.f,
5.f, 1.5f,
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
- auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
+ auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(tile("tile", "input", tile::along_x, 2));
+ topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
std::vector<float> input_vec = { 1.f, 0.f,
5.f, 1.5f,
TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) {
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 1 } });
- auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 2 } });
+ auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(tile("tile", "input", tile::along_x, 4));
+ topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f};
set_values(input, input_vec);
topology topology;
topology.add(input_layout("input", input.get_layout()));
- topology.add(tile("tile", "input", tile::along_z, 2));
+ topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4)));
std::vector<float> input_vec = {
1.f, 0.f,
EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i;
}
}
-