inference-engine/thirdparty/clDNN/src/prior_box.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "prior_box_inst.h"
  18 #include "primitive_type_base.h"
  19 #include "error_handler.h"
  20 #include "json_object.h"
  21
  22 #include <cmath>
  23
  24 namespace cldnn
  25 {
  26 primitive_type_id prior_box_type_id()
  27 {
  28     static primitive_type_base<prior_box> instance;
  29     return &instance;
  30 }
  31
  32 namespace {
  33     template<typename dtype>
  34     void calculate_prior_box_output(memory_impl& output_mem, layout const& input_layout, prior_box& argument)
  35     {
  36         // Calculate output.
  37         // All the inputs for this layer are known at this point,
  38         // so the output buffer is written here and not in execute().
  39
  40         const int layer_width = input_layout.size.spatial[0];
  41         const int layer_height = input_layout.size.spatial[1];
  42         const int img_width = argument.img_size.spatial[0];
  43         const int img_height = argument.img_size.spatial[1];
  44         float step_w = argument.step_width;
  45         float step_h = argument.step_height;
  46         if (step_w == 0 || step_h == 0) {
  47             step_w = static_cast<float>(img_width) / layer_width;
  48             step_h = static_cast<float>(img_height) / layer_height;
  49         }
  50         const float offset = argument.offset;
  51         int num_priors = argument.scale_all_sizes ? (int)argument.aspect_ratios.size() * (int)argument.min_sizes.size()
  52                                                                                        + (int)argument.max_sizes.size()
  53                                                   : (int)argument.aspect_ratios.size() + (int)argument.min_sizes.size()
  54                                                                                        + (int)argument.max_sizes.size() - 1;
  55
  56         mem_lock<dtype> lock{ output_mem };
  57         auto out_ptr = lock.begin();
  58
  59         int dim = layer_height * layer_width * num_priors * 4;
  60         int idx = 0;
  61         for (int h = 0; h < layer_height; ++h) {
  62             for (int w = 0; w < layer_width; ++w) {
  63                 float center_x = (w + offset) * step_w;
  64                 float center_y = (h + offset) * step_h;
  65                 float box_width, box_height;
  66                 for (size_t s = 0; s < argument.min_sizes.size(); ++s) {
  67                     float min_size = argument.min_sizes[s];
  68                     // first prior: aspect_ratio = 1, size = min_size
  69                     box_width = box_height = min_size;
  70                     // xmin
  71                     out_ptr[idx++] = (dtype)((center_x - box_width / 2.f) / img_width);
  72                     // ymin
  73                     out_ptr[idx++] = (dtype)((center_y - box_height / 2.f) / img_height);
  74                     // xmax
  75                     out_ptr[idx++] = (dtype)((center_x + box_width / 2.f) / img_width);
  76                     // ymax
  77                     out_ptr[idx++] = (dtype)((center_y + box_height / 2.f) / img_height);
  78
  79                     if (argument.max_sizes.size() > 0) {
  80                         float max_size_ = argument.max_sizes[s];
  81                         // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
  82                         box_width = box_height = sqrt(min_size * max_size_);
  83                         // xmin
  84                         out_ptr[idx++] = (dtype)((center_x - box_width / 2.f) / img_width);
  85                         // ymin
  86                         out_ptr[idx++] = (dtype)((center_y - box_height / 2.f) / img_height);
  87                         // xmax
  88                         out_ptr[idx++] = (dtype)((center_x + box_width / 2.f) / img_width);
  89                         // ymax
  90                         out_ptr[idx++] = (dtype)((center_y + box_height / 2.f) / img_height);
  91                     }
  92
  93                     if (argument.scale_all_sizes || (!argument.scale_all_sizes && (s == argument.min_sizes.size() - 1)))
  94                     {
  95                         min_size = argument.scale_all_sizes ? argument.min_sizes[s] : argument.min_sizes[0];
  96                         // rest of priors
  97                         for (size_t r = 0; r < argument.aspect_ratios.size(); ++r)
  98                         {
  99                             float ar = argument.aspect_ratios[r];
 100                             if (fabs(ar - 1.) < 1e-6)
 101                             {
 102                                 continue;
 103                             }
 104                             box_width = min_size * sqrt(ar);
 105                             box_height = min_size / sqrt(ar);
 106                             // xmin
 107                             out_ptr[idx++] = (dtype) ((center_x - box_width / 2.f) / img_width);
 108                             // ymin
 109                             out_ptr[idx++] = (dtype) ((center_y - box_height / 2.f) / img_height);
 110                             // xmax
 111                             out_ptr[idx++] = (dtype) ((center_x + box_width / 2.f) / img_width);
 112                             // ymax
 113                             out_ptr[idx++] = (dtype) ((center_y + box_height / 2.f) / img_height);
 114                         }
 115                     }
 116                 }
 117             }
 118         }
 119
 120         // clip the prior's coordinate such that it is within [0, 1]
 121         if (argument.clip) {
 122             for (int d = 0; d < dim; ++d) {
 123                 out_ptr[d] = (dtype)std::min(std::max((float)out_ptr[d], 0.f), 1.f);
 124             }
 125         }
 126
 127         // set the variance.
 128         int count = output_mem.get_layout().size.spatial[0] * output_mem.get_layout().size.spatial[1];
 129         for (int h = 0; h < layer_height; ++h) {
 130             for (int w = 0; w < layer_width; ++w) {
 131                 for (int i = 0; i < num_priors; ++i) {
 132                     for (int j = 0; j < 4; ++j) {
 133                         out_ptr[count] = (dtype)((argument.variance.size() == 1) ? argument.variance[0] : argument.variance[j]);
 134                         ++count;
 135                     }
 136                 }
 137             }
 138         }
 139     }
 140 }
 141
 142 prior_box_node::typed_program_node(std::shared_ptr<prior_box> prim, program_impl& prog)
 143     : parent(prim, prog)
 144 {
 145     constant = true;
 146 }
 147
 148 void prior_box_node::calc_result()
 149 {
 150     if (result != nullptr)
 151         return;
 152
 153     auto& argument = *typed_desc();
 154
 155     //Check arguments
 156     CLDNN_ERROR_LESS_OR_EQUAL_THAN(id(), "Argument min size", argument.min_sizes.size(), "not proper size", 0, "Must provide at least one min size.");
 157
 158     for (size_t i = 0; i < argument.min_sizes.size(); i++) {
 159         CLDNN_ERROR_LESS_OR_EQUAL_THAN(id(), "Min size value at index: " + std::to_string(i), argument.min_sizes[i], "less or equal than 0", 0, "Min size must be positive.");
 160     }
 161     if (argument.max_sizes.size() > 0) {
 162         CLDNN_ERROR_NOT_EQUAL(id(), "Argument min sizes", argument.min_sizes.size(), "argument max sizes", argument.max_sizes.size(), "Number of min sizes must be equal to number of max sizes.");
 163     }
 164     for (size_t i = 0; i < argument.max_sizes.size(); i++) {
 165         CLDNN_ERROR_GREATER_OR_EQUAL_THAN(id(), "Argument min size value", argument.min_sizes[i], "argument max sizes value", argument.max_sizes[i], "Max size must be greater than Min size.");
 166     }
 167     if (argument.variance.size() > 1) {
 168         CLDNN_ERROR_NOT_EQUAL(id(), "Argument variance size", argument.variance.size(), "not proper size", 4, "Must provide 4 variances.");
 169         for (size_t i = 0; i < argument.variance.size(); i++) {
 170             CLDNN_ERROR_LESS_OR_EQUAL_THAN(id(), "Varaiance value at index: " + std::to_string(i), argument.variance[i], "value", 0, "Variance must be positive.");
 171         }
 172     }
 173     else if (argument.variance.size() == 1) {
 174         CLDNN_ERROR_LESS_OR_EQUAL_THAN(id(), "Varaiance value at index 0", argument.variance[0], "value", 0, "Variance must be positive.");
 175     }
 176
 177     CLDNN_ERROR_LESS_OR_EQUAL_THAN(id(), "Image dimension spatial X", argument.img_size.spatial[0], "value", 0, "Image spatial X must be positive.");
 178     CLDNN_ERROR_LESS_OR_EQUAL_THAN(id(), "Image dimension spatial Y", argument.img_size.spatial[1], "value", 0, "Image spatial Y must be positive.");
 179
 180     CLDNN_ERROR_LESS_THAN(id(), "Step height", argument.step_height, "value", 0, "Step height must be positive.");
 181     CLDNN_ERROR_LESS_THAN(id(), "Step width", argument.step_width, "value", 0, "Step width must be positive.");
 182
 183     CLDNN_ERROR_BOOL(id(), "Prior box padding", is_padded(), "Prior-box layer doesn't support output padding.");
 184
 185     //allocate storage
 186     result = get_program().get_engine().allocate_memory(get_output_layout());
 187
 188     //perform calculations
 189     if (input().get_output_layout().data_type == data_types::f16)
 190         calculate_prior_box_output<data_type_to_type<data_types::f16>::type>(*result, input().get_output_layout(), *typed_desc());
 191     else
 192         calculate_prior_box_output<data_type_to_type<data_types::f32>::type>(*result, input().get_output_layout(), *typed_desc());
 193 }
 194
 195 layout prior_box_inst::calc_output_layout(prior_box_node const& node)
 196 {
 197     assert((bool)node.get_primitive()->output_data_type == false
 198            && "Output data type forcing is not supported for prior_box_node!");
 199     auto desc = node.get_primitive();
 200     auto input_layout = node.input().get_output_layout();
 201     assert(input_layout.size.spatial.size() == 2);
 202
 203     const int layer_width = input_layout.size.spatial[0];
 204     const int layer_height = input_layout.size.spatial[1];
 205
 206     int num_priors = desc->scale_all_sizes ? (int)desc->aspect_ratios.size() * (int)desc->min_sizes.size() + (int)desc->max_sizes.size()
 207                                            : (int)desc->aspect_ratios.size() + (int)desc->min_sizes.size() + (int)desc->max_sizes.size() - 1;
 208
 209     // Since all images in a batch has same height and width, we only need to
 210     // generate one set of priors which can be shared across all images.
 211     // 2 features. First feature stores the mean of each prior coordinate.
 212     // Second feature stores the variance of each prior coordinate.
 213
 214     auto output_data_type = input_layout.data_type == data_types::f16 ? data_types::f16 : data_types::f32;
 215     return{ output_data_type, cldnn::format::bfyx, cldnn::tensor( 1, 2, 1, layer_width * layer_height * num_priors * 4 ) };
 216 }
 217
 218 std::string vector_to_string(std::vector<float> vec)
 219 {
 220     std::stringstream result;
 221     for (size_t i = 0; i < vec.size(); i++)
 222         result << vec.at(i) << ", ";
 223     return result.str();
 224 }
 225
 226 std::string prior_box_inst::to_string(prior_box_node const& node)
 227 {
 228     auto desc            = node.get_primitive();
 229     auto flip            = desc->flip ? "true" : "false";
 230     auto clip            = desc->clip ? "true" : "false";
 231     auto scale_all_sizes = desc->scale_all_sizes ? "true" : "false";
 232     auto node_info       = node.desc_to_json();
 233
 234     std::string str_min_sizes    = vector_to_string(desc->min_sizes);
 235     std::string str_max_sizes    = vector_to_string(desc->max_sizes);
 236     std::string str_variance     = vector_to_string(desc->variance);
 237     std::string str_aspect_ratio = vector_to_string(desc->aspect_ratios);
 238
 239     std::stringstream primitive_description;
 240
 241     json_composite prior_info;
 242     prior_info.add("input id", node.input().id());
 243     prior_info.add("iamge size", desc->img_size);
 244     prior_info.add("variance", str_variance);
 245
 246     json_composite box_sizes_info;
 247     box_sizes_info.add("min sizes", str_min_sizes);
 248     box_sizes_info.add("max sizes", str_max_sizes);
 249     prior_info.add("box sizes", box_sizes_info);
 250
 251     prior_info.add("aspect_ratio", str_aspect_ratio);
 252     prior_info.add("flip", flip);
 253     prior_info.add("clip", clip);
 254     prior_info.add("scale all sizes", scale_all_sizes);
 255
 256     json_composite step_info;
 257     step_info.add("step width", desc->step_width);
 258     step_info.add("step height", desc->step_height);
 259     step_info.add("offset", desc->offset);
 260     prior_info.add("step", step_info);
 261
 262     node_info->add("prior box info", prior_info);
 263     node_info->dump(primitive_description);
 264
 265     return primitive_description.str();
 266 }
 267
 268 prior_box_inst::typed_primitive_inst(network_impl& network, prior_box_node const& node)
 269     :parent(network, node)
 270 {
 271     CLDNN_ERROR_MESSAGE(node.id(), "Prior box primitive instance should not be created!");
 272 }
 273
 274 }