inference-engine/thirdparty/clDNN/src/detection_output.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "detection_output_inst.h"
  18 #include "primitive_type_base.h"
  19 #include "network_impl.h"
  20 #include "error_handler.h"
  21 #include "json_object.h"
  22
  23 namespace cldnn
  24 {
  25 primitive_type_id detection_output_type_id()
  26 {
  27     static primitive_type_base<detection_output> instance;
  28     return &instance;
  29 }
  30
  31 layout detection_output_inst::calc_output_layout(detection_output_node const& node)
  32 {
  33     assert((bool)node.get_primitive()->output_data_type == false
  34            && "Output data type forcing is not supported for "
  35               "detection_output_node!");
  36     CLDNN_ERROR_NOT_EQUAL(node.id(), "Detection output layer input number", node.get_dependencies().size(), "expected number of inputs", static_cast<size_t>(3), "");
  37
  38     auto input_layout = node.location().get_output_layout();
  39
  40     // Batch size and feature size are 1.
  41     // Number of bounding boxes to be kept is set to keep_top_k*batch size.
  42     // If number of detections is lower than top_k, will write dummy results at the end with image_id=-1.
  43     // Each row is a 7 dimension vector, which stores:
  44     // [image_id, label, confidence, xmin, ymin, xmax, ymax]
  45     int output_size = (int)input_layout.get_linear_size() / PRIOR_BOX_SIZE;
  46     int num_classes = node.get_primitive()->num_classes;
  47
  48     if (node.get_primitive()->share_location)
  49     {
  50         num_classes = (node.get_primitive()->background_label_id == 0) ? node.get_primitive()->num_classes - 1 : node.get_primitive()->num_classes;
  51         output_size *= num_classes;
  52     }
  53
  54     if (node.get_primitive()->top_k != -1)
  55     {
  56         int top_k = node.get_primitive()->top_k * num_classes * input_layout.size.batch[0];
  57         if (top_k < output_size)
  58         {
  59             output_size = top_k;
  60         }
  61     }
  62
  63     output_size *= DETECTION_OUTPUT_ROW_SIZE;
  64     // Add space for number of output results per image - needed in the next detection output step
  65     output_size += ((input_layout.size.batch[0] + 15) / 16) * 16;
  66
  67     if (node.get_program().get_options().get<build_option_type::detection_output_gpu>()->enabled())
  68     {
  69         return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, 1, output_size) };
  70     }
  71     else
  72     {
  73         return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, node.get_primitive()->keep_top_k * input_layout.size.batch[0]) };
  74     }
  75 }
  76
  77 std::string detection_output_inst::to_string(detection_output_node const& node)
  78 {
  79     auto node_info           = node.desc_to_json();
  80     auto desc                = node.get_primitive();
  81     auto share_location      = desc->share_location ? "true" : "false";
  82     auto variance_encoded    = desc->variance_encoded_in_target ? "true" : "false";
  83     auto prior_is_normalized = desc->prior_is_normalized ? "true" : "false";
  84     auto decrease_label_id   = desc->decrease_label_id ? "true" : "false";
  85     auto clip_before_nms     = desc->clip_before_nms ? "true" : "false";
  86     auto clip_after_nms      = desc->clip_after_nms ? "true" : "false";
  87     auto& input_location     = node.location();
  88     auto& input_prior_box    = node.prior_box();
  89     auto& input_confidence   = node.confidence();
  90
  91
  92     std::stringstream primitive_description;
  93     std::string       str_code_type;
  94
  95     switch (desc->code_type)
  96     {
  97     case prior_box_code_type::corner:
  98         str_code_type = "corner";
  99         break;
 100     case prior_box_code_type::center_size:
 101         str_code_type = "center size";
 102         break;
 103     case prior_box_code_type::corner_size:
 104         str_code_type = "corner size";
 105         break;
 106     default:
 107         str_code_type = "not supported code type";
 108         break;
 109     }
 110
 111     json_composite detec_out_info;
 112     detec_out_info.add("input location id", input_location.id());
 113     detec_out_info.add("input confidence id", input_confidence.id());
 114     detec_out_info.add("input prior box id", input_prior_box.id());
 115     detec_out_info.add("num_classes:", desc->num_classes);
 116     detec_out_info.add("keep_top_k", desc->keep_top_k);
 117     detec_out_info.add("share_location", share_location);
 118     detec_out_info.add("background_label_id", desc->background_label_id);
 119     detec_out_info.add("nms_treshold", desc->nms_threshold);
 120     detec_out_info.add("top_k", desc->top_k);
 121     detec_out_info.add("eta", desc->eta);
 122     detec_out_info.add("code_type", str_code_type);
 123     detec_out_info.add("variance_encoded", variance_encoded);
 124     detec_out_info.add("confidence_threshold", desc->confidence_threshold);
 125     detec_out_info.add("prior_info_size", desc->prior_info_size);
 126     detec_out_info.add("prior_coordinates_offset", desc->prior_coordinates_offset);
 127     detec_out_info.add("prior_is_normalized", prior_is_normalized);
 128     detec_out_info.add("input_width", desc->input_width);
 129     detec_out_info.add("input_height", desc->input_height);
 130     detec_out_info.add("decrease_label_id", decrease_label_id);
 131     detec_out_info.add("clip_before_nms", clip_before_nms);
 132     detec_out_info.add("clip_after_nms", clip_after_nms);
 133     detec_out_info.dump(primitive_description);
 134
 135     node_info->add("dection output info", detec_out_info);
 136     node_info->dump(primitive_description);
 137
 138     return primitive_description.str();
 139 }
 140
 141 detection_output_inst::typed_primitive_inst(network_impl& network, detection_output_node const& node)
 142     :parent(network, node)
 143 {
 144     auto location_layout = node.location().get_output_layout();
 145     auto confidence_layout = node.confidence().get_output_layout();
 146     auto prior_box_layout = node.prior_box().get_output_layout();
 147     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Location memory format", location_layout.format.value, "expected bfyx input format", format::bfyx );
 148     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Confidence memory format", confidence_layout.format.value, "expected bfyx input format", format::bfyx );
 149     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Prior box memory format", prior_box_layout.format.value, "expected bfyx input format", format::bfyx );
 150
 151     tensor location_size = location_layout.size;
 152     CLDNN_ERROR_NOT_EQUAL(node.id(), "Location input dimensions", (location_size.feature[0] * location_size.batch[0]), "detection output layer dimensions", (int)location_layout.count(), "Location input/ detection output dims mismatch");
 153
 154     tensor confidence_size = confidence_layout.size;
 155     CLDNN_ERROR_NOT_EQUAL(node.id(), "Confidence input dimensions", (confidence_size.feature[0] * confidence_size.batch[0]), "detection output layer dimensions", (int)confidence_layout.count(), "Confidence input/detection output dims mistmach");
 156
 157     CLDNN_ERROR_NOT_EQUAL(node.id(), "Confidence batch size", confidence_size.batch[0], "location input batch size", location_size.batch[0], "Batch sizes mismatch.");
 158
 159     auto desc              = node.get_primitive();
 160     int prior_feature_size = desc->variance_encoded_in_target ? 1 : 2;
 161     tensor prior_box_size = prior_box_layout.size;
 162     CLDNN_ERROR_NOT_EQUAL(node.id(), "Prior box spatial X", prior_box_size.spatial[0], "expected value", 1, "");
 163     CLDNN_ERROR_NOT_EQUAL(node.id(), "Prior box feature size", prior_box_size.feature[0], "expected value", prior_feature_size, "");
 164
 165     CLDNN_ERROR_BOOL(node.id(), "Detection output layer padding", node.is_padded(), "Detection output layer doesn't support output padding.");
 166     CLDNN_ERROR_BOOL(node.id(), "Detection output layer Prior-box input padding", node.get_dependency(2).is_padded(), "Detection output layer doesn't support input padding in Prior-Box input");
 167 }
 168
 169 /************************ Detection Output keep_top_k part ************************/
 170
 171 primitive_type_id detection_output_sort_type_id()
 172 {
 173     static primitive_type_base<detection_output_sort> instance;
 174     return &instance;
 175 }
 176
 177 layout detection_output_sort_inst::calc_output_layout(detection_output_sort_node const& node)
 178 {
 179     assert((bool)node.get_primitive()->output_data_type == false
 180            && "Output data type forcing is not supported for "
 181               "detection_output_sort_node!");
 182     CLDNN_ERROR_NOT_EQUAL(node.id(), "Detection output layer input number", node.get_dependencies().size(), "expected number of inputs", static_cast<size_t>(1), "");
 183
 184     auto input_layout = node.input().get_output_layout();
 185     int keep_top_k = node.as<detection_output_sort>().get_primitive()->keep_top_k;
 186     int num_images = node.as<detection_output_sort>().get_primitive()->num_images;
 187
 188     // If detection output sort is used as a second part of detection output get proper info from detection otput node
 189     if (num_images == 0)
 190     {
 191         CLDNN_ERROR_BOOL(node.id(), "node.get_dependency(0).is_type<detection_output>()", !node.get_dependency(0).is_type<detection_output>(), "Cannot calculate output layout.");
 192         input_layout = node.get_dependency(0).as<detection_output>().location().get_output_layout();
 193         keep_top_k = node.get_dependency(0).as<detection_output>().get_primitive()->keep_top_k;
 194         num_images = input_layout.size.batch[0];
 195     }
 196     // Batch size and feature size are 1.
 197     // Number of bounding boxes to be kept is set to keep_top_k*batch size.
 198     // If number of detections is lower than keep_top_k, will write dummy results at the end with image_id=-1.
 199     // Each row is a 7 dimension vector, which stores:
 200     // [image_id, label, confidence, xmin, ymin, xmax, ymax]
 201     return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, keep_top_k * num_images) };
 202 }
 203
 204 std::string detection_output_sort_inst::to_string(detection_output_sort_node const& node)
 205 {
 206     auto node_info = node.desc_to_json();
 207     auto desc = node.get_primitive();
 208
 209     auto& input_bboxes = node.input();
 210
 211     std::stringstream primitive_description;
 212
 213     json_composite detec_out_info;
 214     detec_out_info.add("input bboxes id", input_bboxes.id());
 215     detec_out_info.add("num_classes:", desc->num_images);
 216     detec_out_info.add("num_classes:", desc->num_classes);
 217     detec_out_info.add("keep_top_k", desc->keep_top_k);
 218     detec_out_info.add("share_location", desc->share_location);
 219     detec_out_info.add("top_k", desc->top_k);
 220     detec_out_info.dump(primitive_description);
 221
 222     node_info->add("dection output info", detec_out_info);
 223     node_info->dump(primitive_description);
 224
 225     return primitive_description.str();
 226 }
 227
 228 detection_output_sort_inst::typed_primitive_inst(network_impl& network, detection_output_sort_node const& node)
 229     :parent(network, node)
 230 {
 231     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input memory format", node.get_dependency(0).get_output_layout().format.value, "expected bfyx input format", format::bfyx);
 232
 233     CLDNN_ERROR_BOOL(node.id(), "Detecion output layer padding", node.is_padded(), "Detection output layer doesn't support output padding.");
 234 }
 235 }