[IE CLDNN] Enabled more functional tests and added several fixes into ops implementat...
authorVladimir Paramuzov <vladimir.paramuzov@intel.com>
Sat, 24 Oct 2020 20:38:13 +0000 (23:38 +0300)
committerGitHub <noreply@github.com>
Sat, 24 Oct 2020 20:38:13 +0000 (23:38 +0300)
44 files changed:
inference-engine/src/cldnn_engine/cldnn_program.cpp
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/batch_norm.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_like.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather_tree.cpp
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/logical.cpp
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/minimum_maximum.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pad.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/power.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/region_yolo.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reorg_yolo.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/space_to_depth.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/topk.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/transpose.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/multiply_add.cpp [new file with mode: 0644]
inference-engine/tests/functional/plugin/shared/src/subgraph_tests/matmul_squeeze_add.cpp
inference-engine/thirdparty/clDNN/api/prior_box.hpp
inference-engine/thirdparty/clDNN/api/tile.hpp
inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reshape_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/strided_slice_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/tile_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h
inference-engine/thirdparty/clDNN/src/deconvolution.cpp
inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
inference-engine/thirdparty/clDNN/src/network.cpp
inference-engine/thirdparty/clDNN/src/prior_box.cpp
inference-engine/thirdparty/clDNN/src/program_node.cpp
inference-engine/thirdparty/clDNN/src/tile.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp

index 42c976a..3c88dc3 100644 (file)
@@ -1844,8 +1844,18 @@ void Program::CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine:
     for (auto& a : permuteLayer->GetParamAsInts("order"))
         ie_order.push_back(static_cast<uint16_t>(a));
 
+    auto outDesc = layer->outData[0]->getTensorDesc();
+    auto outDims = outDesc.getDims();
+
+    int rank = std::max(4, static_cast<int>(outDims.size()));
+    if (ie_order.empty()) {
+        // if order size is empty - we need to set inversed axes order
+        for (int o = rank - 1; o >= 0; o--)
+            ie_order.push_back((uint16_t)o);
+    }
+
     // if order size is less than 4 - fill the rest with just copy
-    for (auto o = ie_order.size(); o < 4; o++)
+    for (auto o = ie_order.size(); o < rank; o++)
         ie_order.push_back((uint16_t)o);
 
     /*
@@ -3910,36 +3920,11 @@ void Program::CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CN
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto tileLayer = as<InferenceEngine::GenericLayer*> (layer);
 
-    int axis = tileLayer->GetParamAsInt("axis", 1);
-    int tiles = tileLayer->GetParamAsInt("tiles");
-
-    auto sz = tileLayer->input().get()->getTensorDesc().getDims().size();
-
-    auto cldnnAxisFromIE = [&](int axis) {
-        switch (axis) {
-            case 0: return cldnn::tile::tile_axis::along_b;
-            case 1: return cldnn::tile::tile_axis::along_f;
-            case 2:
-                if (sz > 4)
-                    return cldnn::tile::tile_axis::along_z;
-                else
-                    return cldnn::tile::tile_axis::along_y;
-            case 3:
-                if (sz > 4)
-                    return cldnn::tile::tile_axis::along_y;
-                else
-                    return cldnn::tile::tile_axis::along_x;
-            case 4: return cldnn::tile::tile_axis::along_x;
-            default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis);
-        }
-    };
-
     std::string tileLayerName = layer_type_name_ID(layer);
     auto tilePrim = cldnn::tile(
         tileLayerName,
         inputPrimitives[0],
-        cldnnAxisFromIE(axis),
-        tiles);
+        CldnnTensorFromIEDims(tileLayer->outData[0]->getTensorDesc().getDims()));
 
     topology.add(tilePrim);
     AddPrimitiveToProfiler(tileLayerName, layer);
@@ -5321,6 +5306,8 @@ void Program::CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, Infere
         step_h = static_cast<float>(img_h) / inp_dims.at(img_dims.size() - 2);
     }
 
+    auto output_dt = DataTypeFromPrecision(layer->outData[0]->getTensorDesc().getPrecision());
+
     std::vector<cldnn::primitive_id> inputPrimitives = GetPrevLayersPrimitives(layer);
     // second input isn't used by value - only dimensions taken from the layer input
     std::string priorBoxLayerName = layer_type_name_ID(layer);
@@ -5334,7 +5321,8 @@ void Program::CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, Infere
         step_h,
         offset,
         width,
-        height);
+        height,
+        output_dt);
 
     topology.add(priorBoxPrim);
     AddPrimitiveToProfiler(priorBoxLayerName, layer);
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/batch_norm.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/batch_norm.cpp
new file mode 100644 (file)
index 0000000..73680a9
--- /dev/null
@@ -0,0 +1,49 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/batch_norm.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<double> epsilon = {
+    1e-6,
+    1e-5,
+    1e-4
+};
+const std::vector<std::vector<size_t>> inputShapes = {
+        {1, 3},
+        {2, 5},
+        {1, 3, 10},
+        {1, 3, 1, 1},
+        {2, 5, 4, 4},
+};
+
+
+const auto batchNormParams = testing::Combine(
+        testing::ValuesIn(epsilon),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::ValuesIn(inputShapes),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_BatchNorm,
+        BatchNormLayerTest,
+        batchNormParams,
+        BatchNormLayerTest::getTestCaseName
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert.cpp
new file mode 100644 (file)
index 0000000..a870276
--- /dev/null
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convert.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::U8,
+        InferenceEngine::Precision::I8,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_NoReshape, ConvertLayerTest,
+                        ::testing::Combine(
+                                ::testing::Values(inShape),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ConvertLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_like.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_like.cpp
new file mode 100644 (file)
index 0000000..56283e8
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convert_like.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::U8,
+        InferenceEngine::Precision::I8,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_NoReshape, ConvertLikeLayerTest,
+                        ::testing::Combine(
+                                ::testing::Values(inShape),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(inShape),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ConvertLikeLayerTest::getTestCaseName);
+
+}  // namespace
index b7e2c79..f89d35f 100644 (file)
@@ -59,7 +59,7 @@ const auto LogicalTestParams = ::testing::Combine(
         ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
         ::testing::Values(InferenceEngine::Layout::ANY),
         ::testing::Values(InferenceEngine::Layout::ANY),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
         ::testing::Values(additional_config));
 
 const auto LogicalTestParamsNot = ::testing::Combine(
@@ -71,11 +71,11 @@ const auto LogicalTestParamsNot = ::testing::Combine(
         ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
         ::testing::Values(InferenceEngine::Layout::ANY),
         ::testing::Values(InferenceEngine::Layout::ANY),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
         ::testing::Values(additional_config));
 
 INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs, LogicalLayerTest, LogicalTestParams, LogicalLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefsNot, LogicalLayerTest, LogicalTestParamsNot, LogicalLayerTest::getTestCaseName);
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp
new file mode 100644 (file)
index 0000000..1a0b854
--- /dev/null
@@ -0,0 +1,41 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/mat_mul.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<ShapeRelatedParams> shapeRelatedParams = {
+        { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } }
+};
+
+std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
+        ngraph::helpers::InputLayerType::CONSTANT,
+        ngraph::helpers::InputLayerType::PARAMETER,
+};
+
+std::map<std::string, std::string> additional_config = {};
+
+INSTANTIATE_TEST_CASE_P(smoke_MatMul, MatMulTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(shapeRelatedParams),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(secondaryInputTypes),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                ::testing::Values(additional_config)),
+        MatMulTest::getTestCaseName);
+
+} // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/minimum_maximum.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/minimum_maximum.cpp
new file mode 100644 (file)
index 0000000..473e578
--- /dev/null
@@ -0,0 +1,51 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "single_layer_tests/minimum_maximum.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<std::vector<std::vector<size_t>>> inShapes = {
+        {{2}, {1}},
+        {{1, 1, 1, 3}, {1}},
+        {{1, 2, 4}, {1}},
+        {{1, 4, 4}, {1}},
+        {{1, 4, 4, 1}, {1}},
+        {{256, 56}, {256, 56}},
+        {{8, 1, 6, 1}, {7, 1, 5}},
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+};
+
+const std::vector<ngraph::helpers::MinMaxOpType> opType = {
+        ngraph::helpers::MinMaxOpType::MINIMUM,
+        ngraph::helpers::MinMaxOpType::MAXIMUM,
+};
+
+const std::vector<ngraph::helpers::InputLayerType> inputType = {
+        ngraph::helpers::InputLayerType::CONSTANT,
+        ngraph::helpers::InputLayerType::PARAMETER,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_maximum, MaxMinLayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapes),
+                                ::testing::ValuesIn(opType),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::ValuesIn(inputType),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        MaxMinLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp
new file mode 100644 (file)
index 0000000..da45461
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/normalize_l2.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<int64_t>> axes = {
+        {},
+        {1},
+};
+const std::vector<float> eps = {1e-7f, 1e-6f, 1e-5f, 1e-4f};
+
+const std::vector<ngraph::op::EpsMode> epsMode = {
+        ngraph::op::EpsMode::ADD,
+        ngraph::op::EpsMode::MAX,
+};
+
+const auto normL2params = testing::Combine(
+        testing::ValuesIn(axes),
+        testing::ValuesIn(eps),
+        testing::ValuesIn(epsMode),
+        testing::Values(std::vector<size_t>{1, 3, 10, 5}),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        NormalizeL2,
+        NormalizeL2LayerTest,
+        normL2params,
+        NormalizeL2LayerTest::getTestCaseName
+);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pad.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pad.cpp
new file mode 100644 (file)
index 0000000..379647c
--- /dev/null
@@ -0,0 +1,109 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/pad.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<int64_t>> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}};
+const std::vector<std::vector<int64_t>> padsEnd2D   = {{0, 0}, {1, 1}, {0, 1}, {3, 2}};
+const std::vector<float> argPadValue = {0.f, 1.f, 2.f, -1.f};
+
+const std::vector<ngraph::helpers::PadMode> padMode = {
+        ngraph::helpers::PadMode::EDGE,
+        ngraph::helpers::PadMode::REFLECT,
+};
+
+const auto pad2DConstparams = testing::Combine(
+        testing::ValuesIn(padsBegin2D),
+        testing::ValuesIn(padsEnd2D),
+        testing::ValuesIn(argPadValue),
+        testing::Values(ngraph::helpers::PadMode::CONSTANT),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{13, 5}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Pad2DConst,
+        PadLayerTest,
+        pad2DConstparams,
+        PadLayerTest::getTestCaseName
+);
+
+const auto pad2Dparams = testing::Combine(
+        testing::ValuesIn(padsBegin2D),
+        testing::ValuesIn(padsEnd2D),
+        testing::Values(0),
+        testing::ValuesIn(padMode),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{13, 5}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Pad2D,
+        PadLayerTest,
+        pad2Dparams,
+        PadLayerTest::getTestCaseName
+);
+
+const std::vector<std::vector<int64_t>> padsBegin4D = {{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 0, 1, 0}, {0, 3, 0, 1}};
+const std::vector<std::vector<int64_t>> padsEnd4D   = {{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 0, 0, 1}, {1, 3, 2, 0}};
+
+const auto pad4DConstparams = testing::Combine(
+        testing::ValuesIn(padsBegin4D),
+        testing::ValuesIn(padsEnd4D),
+        testing::ValuesIn(argPadValue),
+        testing::Values(ngraph::helpers::PadMode::CONSTANT),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{3, 5, 10, 11}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Pad4DConst,
+        PadLayerTest,
+        pad4DConstparams,
+        PadLayerTest::getTestCaseName
+);
+
+const auto pad4Dparams = testing::Combine(
+        testing::ValuesIn(padsBegin4D),
+        testing::ValuesIn(padsEnd4D),
+        testing::Values(0),
+        testing::ValuesIn(padMode),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{3, 5, 10, 11}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Pad4D,
+        PadLayerTest,
+        pad4Dparams,
+        PadLayerTest::getTestCaseName
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/power.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/power.cpp
new file mode 100644 (file)
index 0000000..8382ebf
--- /dev/null
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "single_layer_tests/power.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+    std::vector<std::vector<std::vector<size_t>>> inShapes = {
+            {{1, 8}},
+            {{2, 16}},
+            {{3, 32}},
+            {{4, 64}},
+            {{5, 128}},
+            {{6, 256}},
+            {{7, 512}},
+            {{8, 1024}}
+    };
+
+    std::vector<std::vector<float >> Power = {
+            {0.0f},
+            {0.5f},
+            {1.0f},
+            {1.1f},
+            {1.5f},
+            {2.0f},
+    };
+
+    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                             InferenceEngine::Precision::FP16,
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_power, PowerLayerTest,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(inShapes),
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                    ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                    ::testing::Values(InferenceEngine::Layout::ANY),
+                                    ::testing::Values(InferenceEngine::Layout::ANY),
+                                    ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                                    ::testing::ValuesIn(Power)),
+                            PowerLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/region_yolo.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/region_yolo.cpp
new file mode 100644 (file)
index 0000000..17ad626
--- /dev/null
@@ -0,0 +1,85 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/region_yolo.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<ngraph::Shape> inShapes_caffe = {
+    {1, 125, 13, 13}
+};
+
+const std::vector<ngraph::Shape> inShapes_mxnet = {
+    {1, 75, 52, 52},
+    {1, 75, 32, 32},
+    {1, 75, 26, 26},
+    {1, 75, 16, 16},
+    {1, 75, 13, 13},
+    {1, 75, 8, 8}
+};
+
+const std::vector<ngraph::Shape> inShapes_v3 = {
+    {1, 255, 52, 52},
+    {1, 255, 26, 26},
+    {1, 255, 13, 13}
+};
+
+const std::vector<std::vector<int64_t>> masks = {
+    {0, 1, 2},
+    {3, 4, 5},
+    {6, 7, 8}
+};
+
+const std::vector<bool> do_softmax = {true, false};
+const std::vector<size_t> classes = {80, 20};
+const std::vector<size_t> num_regions = {5, 9};
+const size_t coords = 4;
+const int start_axis = 1;
+const int end_axis = 3;
+
+const auto testCase_yolov3 = ::testing::Combine(
+    ::testing::ValuesIn(inShapes_v3),
+    ::testing::Values(classes[0]),
+    ::testing::Values(coords),
+    ::testing::Values(num_regions[1]),
+    ::testing::Values(do_softmax[1]),
+    ::testing::Values(masks[2]),
+    ::testing::Values(start_axis),
+    ::testing::Values(end_axis),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_yolov3_mxnet = ::testing::Combine(
+    ::testing::ValuesIn(inShapes_mxnet),
+    ::testing::Values(classes[1]),
+    ::testing::Values(coords),
+    ::testing::Values(num_regions[1]),
+    ::testing::Values(do_softmax[1]),
+    ::testing::Values(masks[1]),
+    ::testing::Values(start_axis),
+    ::testing::Values(end_axis),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_yolov2_caffe = ::testing::Combine(
+    ::testing::ValuesIn(inShapes_caffe),
+    ::testing::Values(classes[1]),
+    ::testing::Values(coords),
+    ::testing::Values(num_regions[0]),
+    ::testing::Values(do_softmax[0]),
+    ::testing::Values(masks[0]),
+    ::testing::Values(start_axis),
+    ::testing::Values(end_axis),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_TestsRegionYolov3, RegionYoloLayerTest, testCase_yolov3, RegionYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsRegionYoloMxnet, RegionYoloLayerTest, testCase_yolov3_mxnet, RegionYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsRegionYoloCaffe, RegionYoloLayerTest, testCase_yolov2_caffe, RegionYoloLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reorg_yolo.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reorg_yolo.cpp
new file mode 100644 (file)
index 0000000..14f83da
--- /dev/null
@@ -0,0 +1,75 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/reorg_yolo.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<ngraph::Shape> inShapes_caffe_yolov2 = {
+    {1, 64, 26, 26},
+};
+
+const std::vector<ngraph::Shape> inShapes = {
+    {1, 4, 4, 4},
+    {1, 8, 4, 4},
+    {1, 9, 3, 3},
+    {1, 24, 34, 62},
+    {2, 8, 4, 4},
+};
+
+const std::vector<size_t> strides = {
+    2, 3
+};
+
+const auto testCase_caffe_yolov2 = ::testing::Combine(
+    ::testing::ValuesIn(inShapes_caffe_yolov2),
+    ::testing::Values(strides[0]),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_smallest = ::testing::Combine(
+    ::testing::Values(inShapes[0]),
+    ::testing::Values(strides[0]),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_stride_2 = ::testing::Combine(
+    ::testing::Values(inShapes[1]),
+    ::testing::Values(strides[0]),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_stride_3 = ::testing::Combine(
+    ::testing::Values(inShapes[2]),
+    ::testing::Values(strides[1]),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_smaller_h = ::testing::Combine(
+    ::testing::Values(inShapes[4]),
+    ::testing::Values(strides[0]),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto testCase_batch_2 = ::testing::Combine(
+    ::testing::Values(inShapes[3]),
+    ::testing::Values(strides[0]),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_caffe_YoloV2, ReorgYoloLayerTest, testCase_caffe_yolov2, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_stride_2_smallest, ReorgYoloLayerTest, testCase_smallest, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_stride_2, ReorgYoloLayerTest, testCase_stride_2, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_stride_3, ReorgYoloLayerTest, testCase_stride_3, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_smaller_h, ReorgYoloLayerTest, testCase_smaller_h, ReorgYoloLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_TestsReorgYolo_batch_2, ReorgYoloLayerTest, testCase_batch_2, ReorgYoloLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/softmax.cpp
new file mode 100644 (file)
index 0000000..2ce2e8e
--- /dev/null
@@ -0,0 +1,78 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/softmax.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+};
+
+const std::vector<InferenceEngine::Layout> inputLayouts2D = {
+    InferenceEngine::Layout::NC,
+};
+
+const std::vector<InferenceEngine::SizeVector> inputShapes2D = {
+    InferenceEngine::SizeVector {1, 100},
+    InferenceEngine::SizeVector {100, 1},
+    InferenceEngine::SizeVector {10, 10},
+};
+
+const std::vector<size_t> axis2D = {
+    0, 1
+};
+
+const auto params2D = testing::Combine(
+    testing::ValuesIn(netPrecisions),
+    testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+    testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+    testing::ValuesIn(inputLayouts2D),
+    testing::Values(InferenceEngine::Layout::ANY),
+    testing::ValuesIn(inputShapes2D),
+    testing::ValuesIn(axis2D),
+    testing::Values(CommonTestUtils::DEVICE_GPU),
+    testing::Values(std::map<std::string, std::string>())
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_SoftMax2D,
+        SoftMaxLayerTest,
+        params2D,
+        SoftMaxLayerTest::getTestCaseName
+);
+
+const std::vector<InferenceEngine::SizeVector> inputShapes4D = {
+    InferenceEngine::SizeVector {1, 100, 1, 1},
+    InferenceEngine::SizeVector {1, 3, 4, 3},
+    InferenceEngine::SizeVector {2, 3, 4, 5},
+};
+
+const std::vector<size_t> axis4D = {0, 1, 2, 3};
+
+const auto params4D = testing::Combine(
+    testing::ValuesIn(netPrecisions),
+    testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+    testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+    testing::Values(InferenceEngine::Layout::NCHW),
+    testing::Values(InferenceEngine::Layout::ANY),
+    testing::ValuesIn(inputShapes4D),
+    testing::ValuesIn(axis4D),
+    testing::Values(CommonTestUtils::DEVICE_GPU),
+    testing::Values(std::map<std::string, std::string>())
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_SoftMax4D,
+        SoftMaxLayerTest,
+        params4D,
+        SoftMaxLayerTest::getTestCaseName
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/space_to_depth.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/space_to_depth.cpp
new file mode 100644 (file)
index 0000000..56b7137
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/opsets/opset3.hpp>
+
+#include "single_layer_tests/space_to_depth.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::opset3;
+
+namespace {
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::U8,
+        InferenceEngine::Precision::I16,
+};
+
+const std::vector<SpaceToDepth::SpaceToDepthMode> modes = {
+        SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+        SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST};
+
+const std::vector<std::vector<size_t >> inputShapesBS2 = {
+        {1, 1, 2, 2}, {1, 1, 4, 4}, {1, 1, 6, 6}, {2, 8, 6, 6}, {2, 4, 10, 8},
+        {1, 1, 2, 2, 2}, {1, 1, 4, 4, 4}, {1, 1, 6, 6, 6}, {2, 8, 6, 6, 6}, {2, 4, 10, 8, 12}};
+
+const auto SpaceToDepthBS2 = ::testing::Combine(
+        ::testing::ValuesIn(inputShapesBS2),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(modes),
+        ::testing::Values(2),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(SpaceToDepthBS2, SpaceToDepthLayerTest, SpaceToDepthBS2, SpaceToDepthLayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t >> inputShapesBS3 = {
+        {1, 1, 3, 3}, {1, 1, 6, 6}, {1, 1, 9, 9}, {2, 4, 9, 9}, {2, 3, 15, 12},
+        {1, 1, 3, 3, 3}, {1, 1, 6, 6, 6}, {1, 1, 9, 9, 9}, {2, 4, 9, 9, 9}, {2, 3, 15, 12, 18}};
+
+const auto SpaceToDepthBS3 = ::testing::Combine(
+        ::testing::ValuesIn(inputShapesBS3),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(modes),
+        ::testing::Values(3),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(SpaceToDepthBS3, SpaceToDepthLayerTest, SpaceToDepthBS3, SpaceToDepthLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp
new file mode 100644 (file)
index 0000000..9dc434f
--- /dev/null
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/tile.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32
+};
+
+const std::vector<std::vector<size_t>> repeats = {
+        {1, 2, 3},
+        {2, 1, 1},
+        {2, 3, 1},
+        {2, 2, 2},
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_Tile, TileLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(repeats),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(std::vector<size_t>({2, 3, 4})),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        TileLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_Tile6d, TileLayerTest,
+        ::testing::Combine(
+                ::testing::Values(std::vector<size_t>({1, 1, 1, 2, 1, 2})),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(std::vector<size_t>({1, 4, 3, 1, 3, 1})),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        TileLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/topk.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/topk.cpp
new file mode 100644 (file)
index 0000000..7b31962
--- /dev/null
@@ -0,0 +1,55 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/topk.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<int64_t> axes = {
+        0,
+        1,
+        2,
+};
+
+const std::vector<int64_t> k = {
+        1,
+        5,
+        10,
+};
+
+const std::vector<ngraph::opset4::TopK::Mode> modes = {
+        ngraph::opset4::TopK::Mode::MIN,
+        ngraph::opset4::TopK::Mode::MAX
+};
+
+const std::vector<ngraph::opset4::TopK::SortType> sortTypes = {
+        ngraph::opset4::TopK::SortType::NONE,
+        ngraph::opset4::TopK::SortType::SORT_INDICES,
+        ngraph::opset4::TopK::SortType::SORT_VALUES,
+};
+
+
+INSTANTIATE_TEST_CASE_P(smoke_TopK, TopKLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(k),
+                ::testing::ValuesIn(axes),
+                ::testing::ValuesIn(modes),
+                ::testing::ValuesIn(sortTypes),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(std::vector<size_t>({10, 10, 10})),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        TopKLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/transpose.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/transpose.cpp
new file mode 100644 (file)
index 0000000..0aa03eb
--- /dev/null
@@ -0,0 +1,45 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/transpose.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {
+        std::vector<size_t>{1, 3, 100, 100},
+};
+
+const std::vector<std::vector<size_t>> inputOrder = {
+        std::vector<size_t>{0, 3, 2, 1},
+        std::vector<size_t>{},
+};
+
+const auto params = testing::Combine(
+        testing::ValuesIn(inputOrder),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::ValuesIn(inputShapes),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Transpose,
+        TransposeLayerTest,
+        params,
+        TransposeLayerTest::getTestCaseName
+);
+
+}  // namespace
index 9234b27..5a696f9 100644 (file)
@@ -24,5 +24,15 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*EltwiseLayerTest.*IS=\(.*\..*\..*\..*\..*\).*eltwiseOpType=Pow.*secondaryInputType=CONSTANT.*)",
             // TODO: Issue: 40958
             R"(.*(ConstantResultSubgraphTest).*)",
+
+            // TODO: Issue: 41467 -- "unsupported element type f16 op Convert"
+            R"(.*(ConvertLayerTest).*targetPRC=FP16.*)",
+            // TODO: Issue: 41466 -- "Unsupported op 'ConvertLike'"
+            R"(.*(ConvertLikeLayerTest).*)",
+            // TODO: Issue: 41462
+            R"(.*(SoftMaxLayerTest).*axis=0.*)",
+            // TODO: Issue: 41461
+            R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
+            R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
     };
 }
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/multiply_add.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/multiply_add.cpp
new file mode 100644 (file)
index 0000000..81eece1
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/multiply_add.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {
+        {1, 3},
+        {1, 3, 2},
+        {1, 3, 2, 5},
+        {1, 3, 2, 5, 4},
+        {1, 3, 2, 2, 4, 5},
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_MultipleAdd_Nd, MultiplyAddLayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        MultiplyAddLayerTest::getTestCaseName);
+
+}  // namespace
index 9ef0b93..b071fdb 100644 (file)
@@ -75,7 +75,7 @@ void MatmulSqueezeAddTest::SetUp() {
 
     auto constant_2 = ngraph::builder::makeConstant<float>(ngPrc, { 1, inputShape[0], outputSize },
         generateFloatNumbers(0, 1, inputShape[0] * outputSize), false);
-    auto add_0 = std::make_shared<ngraph::op::Add>(unsqueeze_0, constant_2);
+    auto add_0 = std::make_shared<ngraph::op::v1::Add>(unsqueeze_0, constant_2);
 
     auto constant_3 = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 1 }, std::vector<size_t>{0});
     auto squeeze_0 = std::make_shared<ngraph::op::Squeeze>(add_0, constant_3);
index 5137db2..9ef0f21 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -124,8 +124,9 @@ struct prior_box : public primitive_base<prior_box> {
               const float offset,
               const std::vector<float>& widths,
               const std::vector<float>& heights,
+              data_types output_dt,
               const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding),
+        : primitive_base(id, {input}, output_padding, optional_data_type{output_dt}),
           img_size(img_size),
           flip(false),
           clip(clip),
index 6117510..f3ef64c 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -41,19 +41,15 @@ struct tile : public primitive_base<tile> {
 
     /// @brief Constructs tile primitive.
     /// @param id This primitive id.
-    /// @param axis Tiling axis
-    /// @param tiles Tiles number across an axis
+    /// @param out_shape The shape of tiled tensor.
     tile(const primitive_id& id,
          const primitive_id& input,
-         const tile_axis axis,
-         const int tiles,
+         const tensor out_shape,
          const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding), axis(axis), tiles(tiles) {}
+        : primitive_base(id, {input}, output_padding), out_shape(out_shape) {}
 
-    /// @brief Tiling axis
-    tile_axis axis;
-    /// @brief Tiles number across an axis
-    int tiles;
+    /// @brief Shape of the output tensor
+    tensor out_shape;
 };
 /// @}
 /// @}
index 2828374..fc06bd1 100644 (file)
@@ -354,17 +354,6 @@ enum class ConcatAxis {
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// TileAxis
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-enum class TileAxis {
-    X,
-    Y,
-    Z,
-    FEATURE,
-    BATCH,
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // DepthToSpaceMode
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 enum class DepthToSpaceMode {
index 09dc2c9..93caa66 100644 (file)
@@ -22,12 +22,16 @@ ParamsKey ReshapeKernelRef::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F16);
     k.EnableInputDataType(Datatype::F32);
     k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
     k.EnableInputDataType(Datatype::INT32);
+    k.EnableInputDataType(Datatype::UINT32);
     k.EnableInputDataType(Datatype::INT64);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::UINT8);
     k.EnableOutputDataType(Datatype::INT8);
     k.EnableOutputDataType(Datatype::INT32);
+    k.EnableOutputDataType(Datatype::UINT32);
     k.EnableOutputDataType(Datatype::INT64);
     k.EnableAllInputLayout();
     k.EnableAllOutputLayout();
index 5c3bbcc..b9ee090 100644 (file)
@@ -51,8 +51,12 @@ ParamsKey StridedSliceKernelRef::GetSupportedKey() const {
     ParamsKey k;
     k.EnableInputDataType(Datatype::F16);
     k.EnableInputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::INT32);
+    k.EnableInputDataType(Datatype::INT64);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT32);
+    k.EnableOutputDataType(Datatype::INT64);
     k.EnableAllInputLayout();
     k.EnableAllOutputLayout();
     k.EnableTensorOffset();
index de2a1e9..1563f1f 100644 (file)
 #include <string>
 
 namespace kernel_selector {
-static int32_t GetTileChannelIndex(const tile_params& params) {
-    Tensor::DataChannelName name = Tensor::DataChannelName::X;
-    switch (params.axis) {
-        case TileAxis::X:
-            name = Tensor::DataChannelName::X;
-            break;
-        case TileAxis::Y:
-            name = Tensor::DataChannelName::Y;
-            break;
-        case TileAxis::Z:
-            name = Tensor::DataChannelName::Z;
-            break;
-        case TileAxis::FEATURE:
-            name = Tensor::DataChannelName::FEATURE;
-            break;
-        case TileAxis::BATCH:
-            name = Tensor::DataChannelName::BATCH;
-            break;
-        default:
-            break;
-    }
-
-    return DataTensor::Channelndex(params.output.GetLayout(), name);
-}
 
 ParamsKey TileKernelRef::GetSupportedKey() const {
     ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableInputDataType(Datatype::INT32);
     k.EnableInputDataType(Datatype::F16);
     k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::bfzyx);
     k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfwzyx);
+    k.EnableOutputLayout(DataLayout::bfwzyx);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
@@ -61,69 +45,16 @@ ParamsKey TileKernelRef::GetSupportedKey() const {
 CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const {
     CommonDispatchData dispatchData;
 
-    auto in = params.inputs[0];
-
-    size_t inner_size = 1;
-    size_t outer_size = 1;
-
-    const int32_t axis = GetTileChannelIndex(params);
-
-    for (int32_t i = 0; i <= axis; i++) {
-        inner_size *= in.GetDims()[i].v;
-    }
-
-    for (int32_t i = axis + 1; i < static_cast<int32_t>(in.GetDims().size()); i++) {
-        outer_size *= in.GetDims()[i].v;
-    }
-
-    if (inner_size > 1) {
-        dispatchData.gws[0] = outer_size;
-        dispatchData.gws[1] = inner_size;
-        dispatchData.gws[2] = 1;
-
-        dispatchData.lws[0] = 1;
-        dispatchData.lws[1] = 1;
-        dispatchData.lws[2] = 1;
-    } else {
-        dispatchData.gws[0] = Align(outer_size, 16);
-        dispatchData.gws[1] = 1;
-        dispatchData.gws[2] = 1;
+    auto out = params.output;
 
-        dispatchData.lws[0] = 16;
-        dispatchData.lws[1] = 1;
-        dispatchData.lws[2] = 1;
-    }
+    dispatchData.gws = {out.X().v * out.Y().v, out.Z().v * out.W().v, out.Batch().v * out.Feature().v};
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
 
     return dispatchData;
 }
 
 JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
-
-    auto in = params.inputs[0];
-    auto out = params.output;
-
-    size_t inner_size = 1;
-    size_t outer_size = 1;
-    size_t axis_pitch = 1;
-
-    const int32_t axis = GetTileChannelIndex(params);
-
-    for (int32_t i = 0; i <= axis; i++) {
-        inner_size *= in.GetDims()[i].v;
-        axis_pitch *= in.GetDims()[i].LogicalDimPadded();
-    }
-    for (int32_t i = axis + 1; i < static_cast<int32_t>(in.GetDims().size()); i++) {
-        outer_size *= in.GetDims()[i].v;
-    }
-
-    jit.AddConstant(MakeJitConstant("TILES", params.tiles));
-    jit.AddConstant(MakeJitConstant("AXIS_PITCH", axis_pitch));
-    jit.AddConstant(MakeJitConstant("OUTER_SIZE", outer_size));
-    if (inner_size == 1) {
-        jit.AddConstant(MakeJitConstant("OUTPUT_ELEMENTS", out.LogicalSize()));
-        jit.AddConstant(MakeJitConstant("DENSE", 1));
-    }
     return jit;
 }
 
index 0650403..3eb5fbd 100644 (file)
@@ -21,10 +21,7 @@ namespace kernel_selector {
 // tile_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct tile_params : public base_params {
-    tile_params() : base_params(KernelType::TILE), axis(TileAxis::BATCH), tiles(0) {}
-
-    TileAxis axis;
-    int tiles;
+    tile_params() : base_params(KernelType::TILE) {}
 
     virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
 };
index 8f5a6d3..f07845a 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 #include "include/data_types.cl"
 
 
-KERNEL (reshape_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL (reshape_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
 {
     const uint d1 = get_global_id(0);
     const uint d2 = get_global_id(1);
@@ -38,7 +38,7 @@ KERNEL (reshape_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
     const uint od4 = linear % OUTPUT_SIZES[3]; linear /= OUTPUT_SIZES[3];
     const uint od5 = linear % OUTPUT_SIZES[4]; linear /= OUTPUT_SIZES[4];
     const uint od6 = linear % OUTPUT_SIZES[5]; linear /= OUTPUT_SIZES[5];
-    
+
     uint input_offset =  INPUT0_OFFSET +
                          d1*INPUT0_PITCHES[0] +
                          d2*INPUT0_PITCHES[1] +
@@ -53,6 +53,6 @@ KERNEL (reshape_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
                          od4*OUTPUT_PITCHES[3] +
                          od5*OUTPUT_PITCHES[4] +
                          od6*OUTPUT_PITCHES[5];
-    
-    output[output_offset] = ACTIVATION(input[input_offset], ACTIVATION_PARAMS);
+
+    output[output_offset] = ACTIVATION(TO_OUTPUT_TYPE(input[input_offset]), ACTIVATION_PARAMS);
 }
index 085837e..559b355 100644 (file)
@@ -15,7 +15,7 @@
 
 #include "include/include_all.cl"
 
-KERNEL(strided_slice_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
 {
     const uint batch = get_global_id(0);
     const uint feature = get_global_id(1);
index b837bdd..4d99d32 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 
 #include "include/common.cl"
 #include "include/data_types.cl"
+#include "include/fetch.cl"
 
-#if DENSE
-__attribute__((intel_reqd_sub_group_size(16)))
-__attribute__((reqd_work_group_size(16, 1, 1)))
-#endif
-KERNEL (tile_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL(tile_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
 {
-#if DENSE
+    const uint x = (uint)get_global_id(0) % OUTPUT_SIZE_X;
+    const uint y = (uint)get_global_id(0) / OUTPUT_SIZE_X;
+    const uint f = (uint)get_global_id(2) / OUTPUT_BATCH_NUM;
+    const uint b = (uint)get_global_id(2) % OUTPUT_BATCH_NUM;
+    #if OUTPUT_DIMS == 6
+    const uint z = (uint)get_global_id(1) % OUTPUT_SIZE_Z;
+    const uint w = (uint)get_global_id(1) / OUTPUT_SIZE_Z;
+    const uint out_offset = OUTPUT_GET_INDEX(b, f, w, z, y, x);
+    const uint in_offset = INPUT0_GET_INDEX_SAFE(b, f, w, z, y, x);
+    #elif OUTPUT_DIMS == 5
+    const uint z = (uint)get_global_id(1);
+    const uint out_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+    const uint in_offset = INPUT0_GET_INDEX_SAFE(b, f, z, y, x);
+    #elif OUTPUT_DIMS == 4
+    const uint out_offset = OUTPUT_GET_INDEX(b, f, y, x);
+    const uint in_offset = INPUT0_GET_INDEX_SAFE(b, f, y, x);
+    #endif
 
-    const uint id = get_global_id(0);
-    const uint group_id = id / 16;
-    const uint lid = get_local_id(0);
-    const uint idx = min((uint)(id), (uint)(OUTER_SIZE - 1));
-    UNIT_TYPE val = input[idx];
-
-    for (int t = 0; t < TILES; t++)
-    {
-        UNIT_TYPE save_val = intel_sub_group_shuffle(val, (t*16 + lid)/TILES);
-        int offset = group_id*16*TILES + t*16 + lid;
-        if (offset < OUTPUT_ELEMENTS)
-            output[offset] = save_val;
-    }
-#else
-    const uint outer_idx = get_global_id(0);
-    const uint inner_idx = get_global_id(1);
-    if (inner_idx >= AXIS_PITCH) return;
-
-    for (int t = 0; t < TILES; t++)
-    {
-        output[outer_idx*TILES*AXIS_PITCH + t*AXIS_PITCH + inner_idx] = input[outer_idx*AXIS_PITCH + inner_idx];
-    }
-#endif
+    output[out_offset] = input[in_offset];
 }
index acb26f7..cc73e2e 100644 (file)
@@ -120,50 +120,54 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa
     if (conf.empty())
         return jit;
 
-    for (auto& c : conf) {
-        std::string fused_ops;
-        std::string fused_ops_preload;
-        std::string fused_ops_calc;
-        std::string in_name = c.input_var_name;
-        Datatype in_type = c.input_dt;
-        bool can_all_use_preload = true;
-
-        for (size_t i = 0; i < params.fused_ops.size(); i++) {
-            auto fused_dep_codegen = FusedOpsCodeGenerator(params.fused_ops[i]);
-            std::string out_var;
-            Datatype out_type;
-            jit.Merge(fused_dep_codegen.MakeLoadJitConstants(c, params.output));
-            jit.Merge(fused_dep_codegen.MakeOpJitConstants(c, in_name, in_type, out_var, out_type));
-            in_name = out_var;
-            in_type = out_type;
-
-            bool can_use_preload = fused_dep_codegen.CanPreloadData(c);
-            can_all_use_preload &= can_use_preload;
-            bool can_preload_eltwise = true;
-            if (params.fused_ops[i].GetType() == FusedOpType::ELTWISE &&
-                c.load_type == FusedOpsConfiguration::LoadType::FEATURE_SHUFFLE)
-                can_preload_eltwise = false;
-            fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
-            fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
-            if (can_use_preload && can_preload_eltwise)
-                fused_ops_preload += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
-            if (c.allow_for_partial_preload && (!can_use_preload || !can_preload_eltwise))
-                fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
-            fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+    try {
+        for (auto& c : conf) {
+            std::string fused_ops;
+            std::string fused_ops_preload;
+            std::string fused_ops_calc;
+            std::string in_name = c.input_var_name;
+            Datatype in_type = c.input_dt;
+            bool can_all_use_preload = true;
+
+            for (size_t i = 0; i < params.fused_ops.size(); i++) {
+                auto fused_dep_codegen = FusedOpsCodeGenerator(params.fused_ops[i]);
+                std::string out_var;
+                Datatype out_type;
+                jit.Merge(fused_dep_codegen.MakeLoadJitConstants(c, params.output));
+                jit.Merge(fused_dep_codegen.MakeOpJitConstants(c, in_name, in_type, out_var, out_type));
+                in_name = out_var;
+                in_type = out_type;
+
+                bool can_use_preload = fused_dep_codegen.CanPreloadData(c);
+                can_all_use_preload &= can_use_preload;
+                bool can_preload_eltwise = true;
+                if (params.fused_ops[i].GetType() == FusedOpType::ELTWISE &&
+                    c.load_type == FusedOpsConfiguration::LoadType::FEATURE_SHUFFLE)
+                    can_preload_eltwise = false;
+                fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+                fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+                if (can_use_preload && can_preload_eltwise)
+                    fused_ops_preload += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+                if (c.allow_for_partial_preload && (!can_use_preload || !can_preload_eltwise))
+                    fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+                fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+            }
+
+            jit.AddConstant(MakeJitConstant("FUSED_OPS" + c.suffix, fused_ops));
+            jit.AddConstant(MakeJitConstant("FUSED_OPS_PRELOAD" + c.suffix, fused_ops_preload));
+            jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc));
+            jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, in_name));
+
+            bool can_any_use_preload = !fused_ops_preload.empty();
+            jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix,
+                can_all_use_preload || (c.allow_for_partial_preload && can_any_use_preload)));
         }
 
-        jit.AddConstant(MakeJitConstant("FUSED_OPS" + c.suffix, fused_ops));
-        jit.AddConstant(MakeJitConstant("FUSED_OPS_PRELOAD" + c.suffix, fused_ops_preload));
-        jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc));
-        jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, in_name));
-
-        bool can_any_use_preload = !fused_ops_preload.empty();
-        jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix,
-            can_all_use_preload || (c.allow_for_partial_preload && can_any_use_preload)));
+        jit.Merge(MakeFusedOpsDeclsJitConstants(params, conf));
+    } catch (std::exception& ex) {
+        throw std::runtime_error("Fused op code generation for node " + params.layerID + " failed with error: " + ex.what());
     }
 
-    jit.Merge(MakeFusedOpsDeclsJitConstants(params, conf));
-
     return jit;
 }
 
index c2a4998..ea1e3a9 100644 (file)
@@ -393,16 +393,6 @@ std::string toString(ConcatAxis a) {
     }
 }
 
-std::string toString(TileAxis a) {
-    switch (a) {
-        case TileAxis::X:       return "X";
-        case TileAxis::Y:       return "Y";
-        case TileAxis::FEATURE: return "FEATURE";
-        case TileAxis::BATCH:   return "BATCH";
-        default: return "";
-    }
-}
-
 std::string toString(GatherAxis a) {
     switch (a) {
         case GatherAxis::X:       return "X";
index ca781c3..2309583 100644 (file)
@@ -247,7 +247,6 @@ std::string toString(NormalizeMode mode);
 std::string toString(MVNMode mode);
 std::string toString(WeightsLayout layout);
 std::string toString(ConcatAxis a);
-std::string toString(TileAxis a);
 std::string toString(GatherAxis a);
 std::string toString(ScatterUpdateAxis a);
 std::string toString(ResampleType type);
index c63320d..5c1607b 100644 (file)
@@ -137,6 +137,7 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
     deconv_info.add("stride", strd.to_string());
     deconv_info.add("input offset", desc->input_offset.to_string());
     deconv_info.add("split", split);
+    deconv_info.add("groups", desc->groups);
     if (desc->with_output_size) {
         json_composite ud_out_size_info;
         ud_out_size_info.add("size", desc->output_size.to_string());
index 2c9b060..e8ce5b1 100644 (file)
@@ -121,14 +121,15 @@ namespace detail {
 
 attach_strided_slice_gpu::attach_strided_slice_gpu() {
     auto val_fw = strided_slice_gpu::create;
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                           val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                           val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                           val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
 }
 
 }  // namespace detail
index 43b2f78..9f2545e 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,23 +27,6 @@ using namespace cldnn;
 namespace cldnn {
 namespace gpu {
 
-kernel_selector::tile_axis convert_axis(tile::tile_axis axis) {
-    switch (axis) {
-        case tile::along_x:
-            return kernel_selector::tile_axis::X;
-        case tile::along_y:
-            return kernel_selector::tile_axis::Y;
-        case tile::along_z:
-            return kernel_selector::tile_axis::Z;
-        case tile::along_f:
-            return kernel_selector::tile_axis::FEATURE;
-        case tile::along_b:
-            return kernel_selector::tile_axis::BATCH;
-        default:
-            return kernel_selector::tile_axis::X;
-    }
-}
-
 struct tile_gpu : typed_primitive_gpu_impl<tile> {
     using parent = typed_primitive_gpu_impl<tile>;
     using parent::parent;
@@ -54,9 +37,6 @@ public:
         auto tile_optional_params =
             get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
 
-        tile_params.axis = convert_axis(arg.get_primitive()->axis);
-        tile_params.tiles = arg.get_primitive()->tiles;
-
         auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
 
@@ -76,10 +56,23 @@ namespace detail {
 attach_tile_gpu::attach_tile_gpu() {
     auto val_fw = tile_gpu::create;
 
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
     implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
     implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
 }
 
 }  // namespace detail
index fc1a657..b4fac62 100644 (file)
@@ -73,7 +73,6 @@ using softmax_dim = kernel_selector::SoftmaxDim;
 using mean_subtruct_mode = kernel_selector::MeanSubtractMode;
 using mean_op = kernel_selector::MeanOp;
 using concat_axis = kernel_selector::ConcatAxis;
-using tile_axis = kernel_selector::TileAxis;
 using tuning_mode = kernel_selector::TuningMode;
 using sample_type = kernel_selector::ResampleType;
 using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode;
index 4a75c30..c263a66 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -368,7 +368,7 @@ void network_impl::set_input_data(const primitive_id& id, memory_impl& data) {
     primitive_inst = find_primitive(id);
 
     if (primitive_inst == nullptr)
-        throw std::runtime_error("topology doesn't contain prmitive:" + id);
+        throw std::runtime_error("topology doesn't contain primitive:" + id);
 
     if (primitive_inst->type() != input_layout::type_id()) {
         CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input");
index 87bccda..87679f0 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -355,7 +355,7 @@ void prior_box_node::calc_result() {
     result = get_program().get_engine().allocate_memory(get_output_layout(), 0, false);
 
     // perform calculations
-    if (input().get_output_layout().data_type == data_types::f16)
+    if (get_output_layout().data_type == data_types::f16)
         calculate_prior_box_output<data_type_to_type<data_types::f16>::type>(*result,
                                                                              input().get_output_layout(),
                                                                              *typed_desc());
@@ -366,8 +366,6 @@ void prior_box_node::calc_result() {
 }
 
 layout prior_box_inst::calc_output_layout(prior_box_node const& node) {
-    assert(static_cast<bool>(node.get_primitive()->output_data_type) == false &&
-           "Output data type forcing is not supported for prior_box_node!");
     auto desc = node.get_primitive();
     auto input_layout = node.input().get_output_layout();
 
@@ -400,6 +398,8 @@ layout prior_box_inst::calc_output_layout(prior_box_node const& node) {
     // Second feature stores the variance of each prior coordinate.
 
     auto output_data_type = input_layout.data_type == data_types::f16 ? data_types::f16 : data_types::f32;
+    if (node.get_primitive()->output_data_type)
+        output_data_type = *node.get_primitive()->output_data_type;
     return {output_data_type, cldnn::format::bfyx, cldnn::tensor(1, 2, 1, layer_width * layer_height * num_priors * 4)};
 }
 
index f6a2821..1ac49b2 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -82,7 +82,7 @@ std::unique_ptr<json_composite> program_node::desc_to_json() const {
     std::unique_ptr<json_composite> node_info = std::unique_ptr<json_composite>(new json_composite());
     node_info->add("ptr", "node_" + std::to_string(reinterpret_cast<uintptr_t>(this)));
     node_info->add("id", id());
-    node_info->add("type", get_extr_type(typeid(*this).name()));
+    node_info->add("type", desc->type_string());
     node_info->add("internal", bool_to_str(this->is_type<internal_primitive>()));
     node_info->add("valid output layout", bool_to_str(valid_output_layout));
 
index 48c15e3..449a8f1 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -34,14 +34,7 @@ layout tile_inst::calc_output_layout(tile_node const& node) {
 
     auto input_layout = node.input().get_output_layout();
     auto input_format = input_layout.format;
-    auto result_sizes = input_layout.size.sizes();
-
-    auto axis_index = node.get_primitive()->axis;
-    auto tiles = node.get_primitive()->tiles;
-
-    // calculate sum of features from all inputs
-    result_sizes[axis_index] *= tiles;
-    return layout{input_layout.data_type, input_format, (tensor) result_sizes};
+    return layout{input_layout.data_type, input_format, desc->out_shape};
 }
 
 std::string tile_inst::to_string(tile_node const& node) {
@@ -53,9 +46,6 @@ std::string tile_inst::to_string(tile_node const& node) {
 
     json_composite tile_info;
     tile_info.add("input id", input.id());
-    tile_info.add("axis", desc->axis);
-    tile_info.add("tiles", desc->tiles);
-
     node_info->add("tile info", tile_info);
     node_info->dump(primitive_description);
 
index c5646a7..570253d 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -134,7 +134,7 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) {
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
     topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
-    topology.add(tile("tile", "reshape", tile::along_y, 4));
+    topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4)));
 
     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
     set_values(input, input_vec);
index 897df97..778c639 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -77,7 +77,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(tile("tile", "input", tile::along_b, 2));
+    topology.add(tile("tile", "input", tensor(2, 2, 2, 2)));
 
     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
                                      2.f, 0.f, 6.f, 5.2f };
@@ -106,7 +106,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(tile("tile", "input", tile::along_f, 2));
+    topology.add(tile("tile", "input", tensor(1, 4, 2, 2)));
 
     std::vector<float> input_vec = { 1.f, 0.f,
                                      5.f, 1.5f,
@@ -134,11 +134,11 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
-    auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
+    auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(tile("tile", "input", tile::along_y, 2));
+    topology.add(tile("tile", "input", tensor(1, 2, 2, 4)));
 
     std::vector<float> input_vec = { 1.f, 0.f,
                                      5.f, 1.5f,
@@ -166,11 +166,11 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) {
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
-    auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
+    auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(tile("tile", "input", tile::along_x, 2));
+    topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
 
     std::vector<float> input_vec = { 1.f, 0.f,
                                      5.f, 1.5f,
@@ -197,12 +197,12 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) {
 TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) {
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 1 } });
-    auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 2 } });
+    auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(tile("tile", "input", tile::along_x, 4));
+    topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
 
     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f};
     set_values(input, input_vec);
@@ -230,7 +230,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(tile("tile", "input", tile::along_z, 2));
+    topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4)));
 
     std::vector<float> input_vec = {
         1.f, 0.f,
@@ -258,4 +258,3 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
         EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i;
     }
 }
-