[IE CLDNN] Mixed mode support for proposal primitive (#1857)
authorVladimir Paramuzov <vladimir.paramuzov@intel.com>
Thu, 27 Aug 2020 08:43:24 +0000 (11:43 +0300)
committerGitHub <noreply@github.com>
Thu, 27 Aug 2020 08:43:24 +0000 (11:43 +0300)
inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/proposal.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp

diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/proposal.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/proposal.cpp
new file mode 100644 (file)
index 0000000..4c2369b
--- /dev/null
@@ -0,0 +1,49 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/proposal.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ngraph::helpers;
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+/* ============= Proposal ============= */
+const std::vector<base_size_type> base_size_ = {16};
+const std::vector<pre_nms_topn_type> pre_nms_topn_ = {100};
+const std::vector<post_nms_topn_type> post_nms_topn_ = {100};
+const std::vector<nms_thresh_type> nms_thresh_ = {0.7f};
+const std::vector<min_size_type> min_size_ = {1};
+const std::vector<ratio_type> ratio_ = {{1.0f, 2.0f}};
+const std::vector<scale_type> scale_ = {{1.2f, 1.5f}};
+const std::vector<clip_before_nms_type> clip_before_nms_ = {false};
+const std::vector<clip_after_nms_type> clip_after_nms_ = {false};
+
+// empty string corresponds to Caffe framework
+const std::vector<framework_type> framework_ = {""};
+
+const auto proposalParams = ::testing::Combine(
+        ::testing::ValuesIn(base_size_),
+        ::testing::ValuesIn(pre_nms_topn_),
+        ::testing::ValuesIn(post_nms_topn_),
+        ::testing::ValuesIn(nms_thresh_),
+        ::testing::ValuesIn(min_size_),
+        ::testing::ValuesIn(ratio_),
+        ::testing::ValuesIn(scale_),
+        ::testing::ValuesIn(clip_before_nms_),
+        ::testing::ValuesIn(clip_after_nms_),
+        ::testing::ValuesIn(framework_)
+);
+
+INSTANTIATE_TEST_CASE_P(Proposal_tests, ProposalLayerTest,
+                        ::testing::Combine(
+                                proposalParams,
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ProposalLayerTest::getTestCaseName
+);
+
+}  // namespace
index 6223dfa..82e3f7c 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -196,44 +196,39 @@ std::vector<roi_t> perform_nms(const std::vector<proposal_t>& proposals,
  *                                                                          *
  ****************************************************************************/
 
+struct im_info_t {
+    int img_w;
+    int img_h;
+    int img_z;
+    int min_bbox_x;
+    int min_bbox_y;
+};
+
 struct proposal_gpu : typed_primitive_impl<proposal> {
     const proposal_node& outer;
 
     explicit proposal_gpu(const proposal_node& arg) : outer(arg) {}
 
     template <typename dtype>
-    void execute(proposal_inst& instance, dtype* proposal_prob_ptr = nullptr) {
-        const std::vector<proposal_inst::anchor>& anchors = instance.get_anchors();
-
-        size_t anchors_num = anchors.size();
-
-        auto& cls_scores = instance.dep_memory(proposal_inst::cls_scores_index);
-        auto& bbox_pred = instance.dep_memory(proposal_inst::bbox_pred_index);
+    void read_image_info(proposal_inst& instance, im_info_t& im_info) {
         auto& image_info = instance.dep_memory(proposal_inst::image_info_index);
-
-        // original input image to the graph (after possible scaling etc.) so that coordinates are valid for it
         mem_lock<dtype> image_info_ptr{image_info};
         const dtype* image_info_mem = image_info_ptr.data();
 
+        bool swap_xy = instance.argument.swap_xy;
+
+        // original input image to the graph (after possible scaling etc.) so that coordinates are valid for it
         int img_w = 1;
         int img_h = 1;
         int img_z = 1;
         int min_bbox_x = 1;
         int min_bbox_y = 1;
-        int scaled_min_bbox_size = instance.argument.min_bbox_size;
-
-        bool swap_xy = instance.argument.swap_xy;
-        bool initial_clip = instance.argument.initial_clip;
-        bool clip_before_nms = instance.argument.clip_before_nms;
-        bool clip_after_nms = instance.argument.clip_after_nms;
-        float coordinates_offset = instance.argument.coordinates_offset;
-        float box_coordinate_scale = instance.argument.box_coordinate_scale;
-        float box_size_scale = instance.argument.box_size_scale;
-        bool for_deformable = instance.argument.for_deformable;
 
         auto image_info_size = image_info.get_layout().size;
         auto image_info_count = image_info_size.feature[0] == 1 ? image_info_size.batch[0] : image_info_size.feature[0];
 
+        int scaled_min_bbox_size = instance.argument.min_bbox_size;
+
         if (image_info_count == 4) {
             img_w =
                 static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_width_index) + EPSILON);
@@ -268,6 +263,31 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
             std::swap(img_w, img_h);
         }
 
+        im_info.img_h = img_h;
+        im_info.img_w = img_w;
+        im_info.img_z = img_z;
+        im_info.min_bbox_x = min_bbox_x;
+        im_info.min_bbox_y = min_bbox_y;
+    }
+
+    template <typename dtype>
+    void execute(proposal_inst& instance, im_info_t im_info, dtype* proposal_prob_ptr = nullptr) {
+        const std::vector<proposal_inst::anchor>& anchors = instance.get_anchors();
+
+        size_t anchors_num = anchors.size();
+
+        auto& cls_scores = instance.dep_memory(proposal_inst::cls_scores_index);
+        auto& bbox_pred = instance.dep_memory(proposal_inst::bbox_pred_index);
+
+        bool swap_xy = instance.argument.swap_xy;
+        bool initial_clip = instance.argument.initial_clip;
+        bool clip_before_nms = instance.argument.clip_before_nms;
+        bool clip_after_nms = instance.argument.clip_after_nms;
+        float coordinates_offset = instance.argument.coordinates_offset;
+        float box_coordinate_scale = instance.argument.box_coordinate_scale;
+        float box_size_scale = instance.argument.box_size_scale;
+        bool for_deformable = instance.argument.for_deformable;
+
         // feat map sizes
         const auto& score_size = cls_scores.get_layout().size;
         int fm_h = score_size.spatial[1];
@@ -311,8 +331,8 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
                                                     bbox_delta,
                                                     anchor_shift_x,
                                                     anchor_shift_y,
-                                                    img_w,
-                                                    img_h,
+                                                    im_info.img_w,
+                                                    im_info.img_h,
                                                     coordinates_offset,
                                                     initial_clip,
                                                     clip_before_nms,
@@ -323,7 +343,7 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
 
                         size_t scores_index =
                             n * num_proposals * 2 + location_index + fm_sz * (anchor_index + anchors_num);
-                        float proposal_confidence = (min_bbox_x <= bbox_w) * (min_bbox_y <= bbox_h) *
+                        float proposal_confidence = (im_info.min_bbox_x <= bbox_w) * (im_info.min_bbox_y <= bbox_h) *
                                                     float_read_helper(cls_scores_mem + scores_index);
                         sorted_proposals_confidence.emplace_back(roi,
                                                                  proposal_confidence,
@@ -350,17 +370,17 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
 
             for (size_t i = 0; i < res_num_rois; ++i) {
                 if (clip_after_nms) {
-                    res[i].x0 = clamp(res[i].x0, 0.0f, static_cast<float>(img_w));
-                    res[i].y0 = clamp(res[i].y0, 0.0f, static_cast<float>(img_h));
-                    res[i].x1 = clamp(res[i].x1, 0.0f, static_cast<float>(img_w));
-                    res[i].y1 = clamp(res[i].y1, 0.0f, static_cast<float>(img_h));
+                    res[i].x0 = clamp(res[i].x0, 0.0f, static_cast<float>(im_info.img_w));
+                    res[i].y0 = clamp(res[i].y0, 0.0f, static_cast<float>(im_info.img_h));
+                    res[i].x1 = clamp(res[i].x1, 0.0f, static_cast<float>(im_info.img_w));
+                    res[i].y1 = clamp(res[i].y1, 0.0f, static_cast<float>(im_info.img_h));
                 }
 
                 float_write_helper(top_data + 5 * i + 0, static_cast<float>(n));
-                float_write_helper(top_data + 5 * i + 1, res[i].x0 / (instance.argument.normalize ? img_w : 1.0f));
-                float_write_helper(top_data + 5 * i + 2, res[i].y0 / (instance.argument.normalize ? img_h : 1.0f));
-                float_write_helper(top_data + 5 * i + 3, res[i].x1 / (instance.argument.normalize ? img_w : 1.0f));
-                float_write_helper(top_data + 5 * i + 4, res[i].y1 / (instance.argument.normalize ? img_h : 1.0f));
+                float_write_helper(top_data + 5 * i + 1, res[i].x0 / (instance.argument.normalize ? im_info.img_w : 1.0f));
+                float_write_helper(top_data + 5 * i + 2, res[i].y0 / (instance.argument.normalize ? im_info.img_h : 1.0f));
+                float_write_helper(top_data + 5 * i + 3, res[i].x1 / (instance.argument.normalize ? im_info.img_w : 1.0f));
+                float_write_helper(top_data + 5 * i + 4, res[i].y1 / (instance.argument.normalize ? im_info.img_h : 1.0f));
                 if (top_data_prob != nullptr && i < sorted_proposals_confidence.size()) {
                     float_write_helper(top_data_prob + i, sorted_proposals_confidence[i].confidence);
                 }
@@ -384,21 +404,31 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
         }
 
         auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false);
+        im_info_t im_info;
+        if (instance.dep_memory(proposal_inst::image_info_index).get_layout().data_type == data_types::f16) {
+            read_image_info<data_type_to_type<data_types::f16>::type>(instance, im_info);
+        } else {
+            read_image_info<data_type_to_type<data_types::f32>::type>(instance, im_info);
+        }
+
+        if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type !=
+            instance.dep_memory(proposal_inst::bbox_pred_index).get_layout().data_type)
+            throw std::runtime_error("clDNN: proposal primitive doesn't support mixed bbox and scores types");
 
         if (instance.dependencies().size() == 4) {
             auto &proposal_probabilities = instance.dep_memory(proposal_inst::proposal_probabilities_out);
             if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) {
                 mem_lock<data_type_to_type<data_types::f16>::type> proposal_prob_ptr{proposal_probabilities};
-                execute<data_type_to_type<data_types::f16>::type>(instance, proposal_prob_ptr.data());
+                execute<data_type_to_type<data_types::f16>::type>(instance, im_info, proposal_prob_ptr.data());
             } else {
                 mem_lock<data_type_to_type<data_types::f32>::type> proposal_prob_ptr{proposal_probabilities};
-                execute<data_type_to_type<data_types::f32>::type>(instance, proposal_prob_ptr.data());
+                execute<data_type_to_type<data_types::f32>::type>(instance, im_info, proposal_prob_ptr.data());
             }
         } else {
             if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) {
-                execute<data_type_to_type<data_types::f16>::type>(instance);
+                execute<data_type_to_type<data_types::f16>::type>(instance, im_info);
             } else {
-                execute<data_type_to_type<data_types::f32>::type>(instance);
+                execute<data_type_to_type<data_types::f32>::type>(instance, im_info);
             }
         }
 
index c41af67..88b3180 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -71,13 +71,13 @@ const int image_z = 1;
 const std::vector<float> ratios = { 0.5f, 1.0f, 2.0f };
 const std::vector<float> scales = { 8.0f, 16.0f, 32.0f };
 
-template <typename Dtype>
+template <typename Dtype, typename ImInfoType = Dtype>
 class TestRunnerProposal
 {
     public:
         explicit TestRunnerProposal(cldnn::tensor image_info_size);
 
-        memory Run(std::vector<Dtype>& data, 
+        memory Run(std::vector<Dtype>& data,
                    std::vector<Dtype>& rois);
 
     private:
@@ -90,13 +90,13 @@ class TestRunnerProposal
         std::unique_ptr<network> _network;
 };
 
-template <typename Dtype>
-TestRunnerProposal<Dtype>::TestRunnerProposal(cldnn::tensor image_info_size) :
+template <typename Dtype, typename ImInfoType>
+TestRunnerProposal<Dtype, ImInfoType>::TestRunnerProposal(cldnn::tensor image_info_size) :
                             _cls_scores_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 18, 23, 14 } ),
                             _bbox_pred_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 36, 23, 14 } ),
-                            _image_info_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, image_info_size),
-                            _test_layer(layer_name, 
-                                        cls_scores_name, 
+                            _image_info_layout(cldnn::type_to_data_type<ImInfoType>::value, format::bfyx, image_info_size),
+                            _test_layer(layer_name,
+                                        cls_scores_name,
                                         bbox_pred_name,
                                         image_info_name,
                                         max_proposals,
@@ -108,7 +108,7 @@ TestRunnerProposal<Dtype>::TestRunnerProposal(cldnn::tensor image_info_size) :
                                         ratios,
                                         scales,
                                         padding())
-{    
+{
     _topology.add(input_layout(cls_scores_name, _cls_scores_layout));
     _topology.add(input_layout(bbox_pred_name, _bbox_pred_layout));
     _topology.add(input_layout(image_info_name, _image_info_layout));
@@ -118,26 +118,26 @@ TestRunnerProposal<Dtype>::TestRunnerProposal(cldnn::tensor image_info_size) :
     _network.reset(new network(_engine, _topology));
 }
 
-template <typename Dtype>
-memory TestRunnerProposal<Dtype>::Run(std::vector<Dtype>& cls_scores_vals,
-                                      std::vector<Dtype>& bbox_pred_vals)
+template <typename Dtype, typename ImInfoType>
+memory TestRunnerProposal<Dtype, ImInfoType>::Run(std::vector<Dtype>& cls_scores_vals,
+                                                  std::vector<Dtype>& bbox_pred_vals)
 {
     memory cls_scores = memory::attach(_cls_scores_layout, cls_scores_vals.data(), cls_scores_vals.size());
     memory bbox_pred  = memory::attach(_bbox_pred_layout, bbox_pred_vals.data(), bbox_pred_vals.size());
 
-    std::vector<Dtype> image_info_vals = { (Dtype)((float)image_h - 0.0000001f), // check fp robustness of the layer
-                                           (Dtype)((float)image_w + 0.0000001f), // check fp robustness of the layer 
-                                           (Dtype)((float)image_z) };
+    std::vector<ImInfoType> image_info_vals = { (ImInfoType)((float)image_h - 0.0000001f), // check fp robustness of the layer
+                                                (ImInfoType)((float)image_w + 0.0000001f), // check fp robustness of the layer
+                                                (ImInfoType)((float)image_z) };
     memory image_info = memory::allocate(_engine, _image_info_layout);
     tests::set_values(image_info, image_info_vals);
-   
+
     _network->set_input_data(cls_scores_name, cls_scores);
     _network->set_input_data(bbox_pred_name, bbox_pred);
     _network->set_input_data(image_info_name, image_info);
 
     std::map<primitive_id, network_output> network_output = _network->execute();
     EXPECT_EQ(network_output.begin()->first, layer_name);
-    return network_output.at(layer_name).get_memory();    
+    return network_output.at(layer_name).get_memory();
 }
 
 TEST(proposal, basic) {
@@ -159,7 +159,7 @@ TEST(proposal, basic) {
 TEST(proposal, fp16) {
     std::vector<FLOAT16> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
     std::vector<FLOAT16> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);
-    
+
     TestRunnerProposal<FLOAT16> t({ 1, 3, 1, 1 });
 
     const memory& output = t.Run(cls_scores, bbox_pred);
@@ -173,6 +173,40 @@ TEST(proposal, fp16) {
     }
 }
 
+TEST(proposal, scores_fp16_im_info_fp32) {
+    std::vector<FLOAT16> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
+    std::vector<FLOAT16> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);
+
+    TestRunnerProposal<FLOAT16, float> t({ 1, 3, 1, 1 });
+
+    const memory& output = t.Run(cls_scores, bbox_pred);
+    ASSERT_EQ(output.get_layout().count(), proposal_ref_size);
+
+    auto d = output.pointer<FLOAT16>();
+
+    for (size_t i = 0; i < proposal_ref_size; i++) {
+        FLOAT16 ref(proposal_ref[i]);
+        EXPECT_NEAR((float)d[i], (float)ref, epsilon_fp16);
+    }
+}
+
+TEST(proposal, scores_fp32_im_info_fp16) {
+    std::vector<float> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
+    std::vector<float> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);
+
+    TestRunnerProposal<float, FLOAT16> t({ 1, 3, 1, 1 });
+
+    const memory& output = t.Run(cls_scores, bbox_pred);
+    ASSERT_EQ(output.get_layout().count(), proposal_ref_size);
+
+    auto d = output.pointer<float>();
+
+    for (size_t i = 0; i < proposal_ref_size; i++) {
+        float ref(proposal_ref[i]);
+        EXPECT_NEAR((float)d[i], (float)ref, epsilon);
+    }
+}
+
 TEST(proposal, img_info_batched) {
     std::vector<float> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
     std::vector<float> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);