inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp

   1 /*
   2 // Copyright (c) 2016-2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "proposal_inst.h"
  18 #include "kernel.h"
  19 #include "implementation_map.h"
  20 #include "network_impl.h"
  21 #include "engine_impl.h"
  22 #include "math_utils.h"
  23 #include "error_handler.h"
  24
  25 #include <algorithm>
  26 #include <string>
  27
  28 #define EPSILON 0.00001f
  29
  30 namespace cldnn { namespace gpu {
  31
  32 namespace {
  33
  34     /****************************************************************************
  35     *                                                                          *
  36     *                              Common Utils                                *
  37     *                                                                          *
  38     ****************************************************************************/
  39
  40     inline const float& clamp(const float & v, const float & lower, const float & upper)
  41     {
  42         return std::max(lower, std::min(v, upper));
  43     }
  44
  45     inline bool hasSingleBatchOutput(const program_node & node)
  46     {
  47         const auto & batch = node.get_output_layout().size.batch;
  48
  49         return batch.empty() || (batch.size() == 1 && batch[0] == 1);
  50     }
  51
  52     struct roi_t
  53     {
  54         float x0, y0, x1, y1;
  55
  56         inline float area() const
  57         {
  58             return std::max(0.f, y1 - y0 + 1.f) * std::max(0.f, x1 - x0 + 1.f);
  59         }
  60     };
  61
  62     struct delta_t { float shift_x, shift_y, log_w, log_h; };
  63
  64     struct proposal_t
  65     {
  66         proposal_t() = default;
  67         proposal_t(const roi_t& r, const float c, const size_t& o) : roi(r), confidence(c), ord(o) {}
  68
  69         roi_t roi;
  70         float confidence;
  71         size_t ord;
  72     };
  73
  74     inline float float_read_helper(const float* mem)
  75     {
  76         return *mem;
  77     }
  78
  79     inline float float_read_helper(const half_t* mem)
  80     {
  81         return float16_to_float32(*((uint16_t*)(mem)));
  82     }
  83
  84     inline void float_write_helper(float* mem, float f)
  85     {
  86         *mem = f;
  87     }
  88
  89     inline void float_write_helper(half_t* mem, float f)
  90     {
  91         *mem = (half_t)float32_to_float16(f);
  92     }
  93
  94     /****************************************************************************
  95      *                                                                          *
  96      *                              Impl Details                                *
  97      *                                                                          *
  98      ****************************************************************************/
  99
 100     void sort_and_keep_n_items(std::vector<proposal_t>& proposals, size_t n)
 101     {
 102         auto cmp_fn = [](const proposal_t& a, const proposal_t& b)
 103         {
 104             return (a.confidence > b.confidence);
 105         };
 106
 107         if (proposals.size() > n)
 108         {
 109             std::partial_sort(proposals.begin(), proposals.begin() + n, proposals.end(), cmp_fn);
 110             proposals.resize(n);
 111         }
 112         else
 113         {
 114             std::sort(proposals.begin(), proposals.end(), cmp_fn);
 115         }
 116     }
 117
 118     roi_t gen_bbox(
 119             const proposal_inst::anchor& box,
 120             const delta_t& delta,
 121             int anchor_shift_x,
 122             int anchor_shift_y,
 123             int img_w,
 124             int img_h,
 125             float coordinates_offset,
 126             bool initial_clip,
 127             bool clip_before_nms)
 128     {
 129         float x0 = box.start_x + anchor_shift_x;
 130         float y0 = box.start_y + anchor_shift_y;
 131         float x1 = box.end_x + anchor_shift_x;
 132         float y1 = box.end_y + anchor_shift_y;
 133
 134         if (initial_clip)
 135         {
 136             x0 = clamp(x0, 0.0f, static_cast<float>(img_w));
 137             y0 = clamp(y0, 0.0f, static_cast<float>(img_h));
 138             x1 = clamp(x1, 0.0f, static_cast<float>(img_w));
 139             y1 = clamp(y1, 0.0f, static_cast<float>(img_h));
 140         }
 141
 142         const float anchor_w = x1 - x0 + coordinates_offset;
 143         const float anchor_h = y1 - y0 + coordinates_offset;
 144         const float center_x = x0 + 0.5f * anchor_w;
 145         const float center_y = y0 + 0.5f * anchor_h;
 146
 147         const float pred_center_x = delta.shift_x * anchor_w + center_x;
 148         const float pred_center_y = delta.shift_y * anchor_h + center_y;
 149         const float half_pred_w = std::exp(delta.log_w) * anchor_w * .5f;
 150         const float half_pred_h = std::exp(delta.log_h) * anchor_h * .5f;
 151
 152         float new_x0 = pred_center_x - half_pred_w;
 153         float new_y0 = pred_center_y - half_pred_h;
 154         float new_x1 = pred_center_x + half_pred_w;
 155         float new_y1 = pred_center_y + half_pred_h;
 156
 157         if (clip_before_nms)
 158         {
 159             new_x0 = clamp(new_x0, 0.f, img_w - coordinates_offset);
 160             new_y0 = clamp(new_y0, 0.f, img_h - coordinates_offset);
 161             new_x1 = clamp(new_x1, 0.f, img_w - coordinates_offset);
 162             new_y1 = clamp(new_y1, 0.f, img_h - coordinates_offset);
 163         }
 164
 165         return { new_x0, new_y0, new_x1, new_y1 };
 166     }
 167
 168     std::vector<roi_t> perform_nms(
 169             const std::vector<proposal_t>& proposals,
 170             float iou_threshold,
 171             size_t top_n,
 172             float coordinates_offset)
 173     {
 174         std::vector<roi_t> res;
 175         res.reserve(top_n);
 176
 177         for (const auto & prop : proposals)
 178         {
 179             const roi_t& bbox = prop.roi;
 180
 181             bool overlaps = std::any_of(res.begin(), res.end(), [&](const roi_t& res_bbox)
 182             {
 183                 bool intersecting = (bbox.x0 < res_bbox.x1) & (res_bbox.x0 < bbox.x1) & (bbox.y0 < res_bbox.y1) & (res_bbox.y0 < bbox.y1);
 184                 float overlap = 0.0f;
 185                 if (intersecting)
 186                 {
 187                     const float x0 = std::max(bbox.x0, res_bbox.x0);
 188                     const float y0 = std::max(bbox.y0, res_bbox.y0);
 189                     const float x1 = std::min(bbox.x1, res_bbox.x1);
 190                     const float y1 = std::min(bbox.y1, res_bbox.y1);
 191
 192                     const float intersect_width = std::max(0.0f, x1 - x0 + coordinates_offset);
 193                     const float intersect_height = std::max(0.0f, y1 - y0 + coordinates_offset);
 194                     const float intersect_size = intersect_width * intersect_height;
 195
 196                     const float A_area = (bbox.x1 - bbox.x0 + coordinates_offset) * (bbox.y1 - bbox.y0 + coordinates_offset);
 197                     const float B_area = (res_bbox.x1 - res_bbox.x0 + coordinates_offset) * (res_bbox.y1 - res_bbox.y0 + coordinates_offset);
 198
 199                     overlap = intersect_size / (A_area + B_area - intersect_size);
 200                 }
 201                 return overlap > iou_threshold;
 202             });
 203
 204             if (!overlaps)
 205             {
 206                 res.push_back(bbox);
 207                 if (res.size() == top_n) break;
 208             }
 209         }
 210
 211         res.resize(top_n);
 212         return res;
 213     }
 214 } // anonymous namespace
 215
 216
 217 /****************************************************************************
 218 *                                                                          *
 219 *                              Proposal Layer                              *
 220 *                                                                          *
 221 ****************************************************************************/
 222
 223 struct proposal_gpu : typed_primitive_impl<proposal>
 224 {
 225     const proposal_node& outer;
 226
 227     proposal_gpu(const proposal_node& arg)
 228             : outer(arg)
 229     {}
 230
 231     template<typename dtype>
 232     void execute(proposal_inst& instance)
 233     {
 234         const std::vector<proposal_inst::anchor>& anchors = instance.get_anchors();
 235
 236         size_t anchors_num = anchors.size();
 237
 238         auto& cls_scores = instance.dep_memory(proposal_inst::cls_scores_index);
 239         auto& bbox_pred  = instance.dep_memory(proposal_inst::bbox_pred_index);
 240         auto& image_info = instance.dep_memory(proposal_inst::image_info_index);
 241
 242         // original input image to the graph (after possible scaling etc.) so that coordinates are valid for it
 243         mem_lock<dtype> image_info_ptr{ image_info };
 244         const dtype* image_info_mem = image_info_ptr.data();
 245
 246         int img_w = 1;
 247         int img_h = 1;
 248         int img_z = 1;
 249         int min_bbox_x = 1;
 250         int min_bbox_y = 1;
 251         int scaled_min_bbox_size = instance.argument.min_bbox_size;
 252
 253         bool swap_xy = instance.argument.swap_xy;
 254         bool initial_clip = instance.argument.initial_clip;
 255         bool clip_before_nms = instance.argument.clip_before_nms;
 256         bool clip_after_nms = instance.argument.clip_after_nms;
 257         float coordinates_offset = instance.argument.coordinates_offset;
 258         float box_coordinate_scale = instance.argument.box_coordinate_scale;
 259         float box_size_scale = instance.argument.box_size_scale;
 260
 261         if (image_info.get_layout().size.feature[0] == 4)
 262         {
 263             img_w = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_width_index) + EPSILON);
 264             img_h = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_height_index) + EPSILON);
 265             min_bbox_x = static_cast<int>(scaled_min_bbox_size * float_read_helper(image_info_mem + 3));
 266             min_bbox_y = static_cast<int>(scaled_min_bbox_size * float_read_helper(image_info_mem + 2));
 267         }
 268         else
 269         {
 270             img_w = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_width_index) + EPSILON);
 271             img_h = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_height_index) + EPSILON);
 272             img_z = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_depth_index) + EPSILON);
 273
 274             scaled_min_bbox_size *= img_z;
 275
 276             min_bbox_x = scaled_min_bbox_size;
 277             if (image_info.get_layout().size.feature[0] > proposal_inst::image_info_scale_min_bbox_x)
 278             {
 279                 min_bbox_x = static_cast<int>(min_bbox_x * float_read_helper(image_info_mem + proposal_inst::image_info_scale_min_bbox_x));
 280             }
 281
 282             min_bbox_y = scaled_min_bbox_size;
 283             if (image_info.get_layout().size.feature[0] > proposal_inst::image_info_scale_min_bbox_y)
 284             {
 285                 min_bbox_y = static_cast<int>(min_bbox_y * float_read_helper(image_info_mem + proposal_inst::image_info_scale_min_bbox_y));
 286             }
 287         }
 288
 289         if (swap_xy)
 290         {
 291             std::swap(img_w, img_h);
 292         }
 293
 294         // feat map sizes
 295         const auto& score_size = cls_scores.get_layout().size;
 296         int fm_h = score_size.spatial[1];
 297         int fm_w = score_size.spatial[0];
 298
 299         int fm_sz = fm_w * fm_h;
 300
 301         mem_lock<dtype> cls_scores_ptr{ cls_scores };
 302         mem_lock<dtype> bbox_pred_ptr{ bbox_pred };
 303         const dtype* cls_scores_mem = cls_scores_ptr.data();
 304         const dtype* bbox_pred_mem  = bbox_pred_ptr.data();
 305
 306         for (int n = 0; n < score_size.batch[0]; n++)
 307         {
 308             std::vector<proposal_t> sorted_proposals_confidence;
 309             size_t num_proposals = fm_h * fm_w * anchors_num;
 310             sorted_proposals_confidence.reserve(num_proposals);
 311             for (int y = 0; y < fm_h; ++y)
 312             {
 313                 for (int x = 0; x < fm_w; ++x)
 314                 {
 315                     const int anchor_shift_x = (swap_xy ? y : x) * instance.argument.feature_stride;
 316                     const int anchor_shift_y = (swap_xy ? x : y) * instance.argument.feature_stride;
 317                     const int location_index = y * fm_w + x;
 318
 319                     // we assume proposals are grouped by window location
 320                     for (unsigned int anchor_index = 0; anchor_index < anchors_num ; anchor_index++)
 321                     {
 322                         float dx0 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 0)) / box_coordinate_scale;
 323                         float dy0 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 1)) / box_coordinate_scale;
 324                         float dx1 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 2)) / box_size_scale;
 325                         float dy1 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 3)) / box_size_scale;
 326
 327                         delta_t bbox_delta { dx0, dy0, dx1, dy1 };
 328
 329                         const roi_t& roi = gen_bbox(anchors[anchor_index], bbox_delta, anchor_shift_x, anchor_shift_y,
 330                                                     img_w, img_h, coordinates_offset, initial_clip, clip_before_nms);
 331
 332                         int bbox_w = (int)(roi.x1 - roi.x0 + coordinates_offset);
 333                         int bbox_h = (int)(roi.y1 - roi.y0 + coordinates_offset);
 334
 335                         size_t scores_index = n*num_proposals * 2 + location_index + fm_sz * (anchor_index + anchors_num);
 336                         float proposal_confidence = (min_bbox_x <= bbox_w)* (min_bbox_y <= bbox_h) * float_read_helper(cls_scores_mem + scores_index);
 337                         sorted_proposals_confidence.emplace_back(roi, proposal_confidence, sorted_proposals_confidence.size());
 338                     }
 339                 }
 340             }
 341
 342             size_t pre_nms = std::min(instance.argument.pre_nms_topn, (int)sorted_proposals_confidence.size());
 343             sort_and_keep_n_items(sorted_proposals_confidence, pre_nms);
 344             std::vector<roi_t> res = perform_nms(sorted_proposals_confidence, instance.argument.iou_threshold,
 345                                                  instance.argument.post_nms_topn, coordinates_offset);
 346
 347             auto& output = instance.output_memory();
 348
 349             mem_lock<dtype> output_ptr{ output };
 350             dtype* top_data = output_ptr.data() + n*instance.argument.post_nms_topn*5;
 351
 352             size_t res_num_rois = res.size();
 353
 354
 355             for (size_t i = 0; i < res_num_rois; ++i)
 356             {
 357                 if (clip_after_nms)
 358                 {
 359                     res[i].x0 = clamp(res[i].x0, 0.0f, float(img_w));
 360                     res[i].y0 = clamp(res[i].y0, 0.0f, float(img_h));
 361                     res[i].x1 = clamp(res[i].x1, 0.0f, float(img_w));
 362                     res[i].y1 = clamp(res[i].y1, 0.0f, float(img_h));
 363                 }
 364
 365                 float_write_helper(top_data + 5 * i + 0, float(n));
 366                 float_write_helper(top_data + 5 * i + 1, res[i].x0 / (instance.argument.normalize ? img_w : 1.0f));
 367                 float_write_helper(top_data + 5 * i + 2, res[i].y0 / (instance.argument.normalize ? img_h : 1.0f));
 368                 float_write_helper(top_data + 5 * i + 3, res[i].x1 / (instance.argument.normalize ? img_w : 1.0f));
 369                 float_write_helper(top_data + 5 * i + 4, res[i].y1 / (instance.argument.normalize ? img_h : 1.0f));
 370             }
 371
 372             for (size_t i = res_num_rois; i < (size_t)instance.argument.post_nms_topn; i++)
 373             {
 374                 float_write_helper(top_data + 5*i + 0, -1.0f);
 375                 float_write_helper(top_data + 5*i + 1,  0.0f);
 376                 float_write_helper(top_data + 5*i + 2,  0.0f);
 377                 float_write_helper(top_data + 5*i + 3,  0.0f);
 378                 float_write_helper(top_data + 5*i + 4,  0.0f);
 379             }
 380         }
 381     }
 382
 383     event_impl::ptr execute_impl(const std::vector<event_impl::ptr>& events, proposal_inst& instance) override
 384     {
 385         for (auto& a : events)
 386         {
 387             a->wait();
 388         }
 389
 390         auto ev = instance.get_network().get_engine().create_user_event(false);
 391
 392         if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16)
 393         {
 394             execute<data_type_to_type<data_types::f16>::type>(instance);
 395         }
 396         else
 397         {
 398             execute<data_type_to_type<data_types::f32>::type>(instance);
 399         }
 400
 401         dynamic_cast<cldnn::user_event*>(ev.get())->set(); // set as complete
 402         return ev;
 403     }
 404
 405     static primitive_impl* create(const proposal_node& arg)
 406     {
 407         const layout & l = arg.image_info().get_output_layout();
 408         const size_t count = static_cast<size_t>(l.size.feature[0]);
 409
 410         //Supported image_info sizes and components meaning:
 411         // - image_info[3] = { img_height, img_width, img_depth }
 412         // - image_info[4] = { img_height, img_width, scale_min_bbox_y, scale_min_bbox_x }
 413         // - image_info[6] = { img_height, img_width, img_depth, scale_min_bbox_y, scale_min_bbox_x, scale_depth_index }
 414         if (count != 3 && count != 4 && count != 6) {
 415             CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items");
 416         }
 417
 418         return new proposal_gpu(arg);
 419     }
 420 };
 421
 422 namespace {
 423     struct attach {
 424         attach()
 425         {
 426             implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), proposal_gpu::create);
 427             implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), proposal_gpu::create);
 428         }
 429
 430         ~attach() {}
 431     };
 432     attach attach_impl;
 433 }
 434 } }