modules/dnn/src/darknet/darknet_io.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //                        (3-clause BSD License)
  13 //
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 // * Redistributions of source code must retain the above copyright notice,
  21 // this list of conditions and the following disclaimer.
  22 //
  23 // * Redistributions in binary form must reproduce the above copyright notice,
  24 // this list of conditions and the following disclaimer in the documentation
  25 // and/or other materials provided with the distribution.
  26 //
  27 // * Neither the names of the copyright holders nor the names of the contributors
  28 // may be used to endorse or promote products derived from this software
  29 // without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall copyright holders or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 /*M///////////////////////////////////////////////////////////////////////////////////////
  45 //MIT License
  46 //
  47 //Copyright (c) 2017 Joseph Redmon
  48 //
  49 //Permission is hereby granted, free of charge, to any person obtaining a copy
  50 //of this software and associated documentation files (the "Software"), to deal
  51 //in the Software without restriction, including without limitation the rights
  52 //to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  53 //copies of the Software, and to permit persons to whom the Software is
  54 //furnished to do so, subject to the following conditions:
  55 //
  56 //The above copyright notice and this permission notice shall be included in all
  57 //copies or substantial portions of the Software.
  58 //
  59 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  60 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  61 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  62 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  63 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  64 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  65 //SOFTWARE.
  66 //
  67 //M*/
  68
  69 #include "../precomp.hpp"
  70
  71 #include <iostream>
  72 #include <fstream>
  73 #include <sstream>
  74
  75 #include "darknet_io.hpp"
  76
  77 namespace cv {
  78     namespace dnn {
  79         namespace darknet {
  80
  81             template<typename T>
  82             T getParam(const std::map<std::string, std::string> &params, const std::string param_name, T init_val)
  83             {
  84                 std::map<std::string, std::string>::const_iterator it = params.find(param_name);
  85                 if (it != params.end()) {
  86                     std::stringstream ss(it->second);
  87                     ss >> init_val;
  88                 }
  89                 return init_val;
  90             }
  91
  92             static const std::string kFirstLayerName = "data";
  93
  94             class setLayersParams {
  95
  96                 NetParameter *net;
  97                 int layer_id;
  98                 std::string last_layer;
  99                 std::vector<std::string> fused_layer_names;
 100
 101             public:
 102                 setLayersParams(NetParameter *_net) :
 103                     net(_net), layer_id(0), last_layer(kFirstLayerName)
 104                 {}
 105
 106                 void setLayerBlobs(int i, std::vector<cv::Mat> blobs)
 107                 {
 108                     cv::dnn::LayerParams &params = net->layers[i].layerParams;
 109                     params.blobs = blobs;
 110                 }
 111
 112                 cv::dnn::LayerParams getParamConvolution(int kernel, int pad,
 113                     int stride, int filters_num)
 114                 {
 115                     cv::dnn::LayerParams params;
 116                     params.name = "Convolution-name";
 117                     params.type = "Convolution";
 118
 119                     params.set<int>("kernel_size", kernel);
 120                     params.set<int>("pad", pad);
 121                     params.set<int>("stride", stride);
 122
 123                     params.set<bool>("bias_term", false);       // true only if(BatchNorm == false)
 124                     params.set<int>("num_output", filters_num);
 125
 126                     return params;
 127                 }
 128
 129
 130                 void setConvolution(int kernel, int pad, int stride,
 131                     int filters_num, int channels_num, int use_batch_normalize)
 132                 {
 133                     cv::dnn::LayerParams conv_param =
 134                         getParamConvolution(kernel, pad, stride, filters_num);
 135
 136                     darknet::LayerParameter lp;
 137                     std::string layer_name = cv::format("conv_%d", layer_id);
 138
 139                     // use BIAS in any case
 140                     if (!use_batch_normalize) {
 141                         conv_param.set<bool>("bias_term", true);
 142                     }
 143
 144                     lp.layer_name = layer_name;
 145                     lp.layer_type = conv_param.type;
 146                     lp.layerParams = conv_param;
 147                     lp.bottom_indexes.push_back(last_layer);
 148                     last_layer = layer_name;
 149                     net->layers.push_back(lp);
 150
 151                     if (use_batch_normalize)
 152                     {
 153                         cv::dnn::LayerParams bn_param;
 154
 155                         bn_param.name = "BatchNorm-name";
 156                         bn_param.type = "BatchNorm";
 157                         bn_param.set<bool>("has_weight", true);
 158                         bn_param.set<bool>("has_bias", true);
 159                         bn_param.set<float>("eps", 1E-6);       // .000001f in Darknet Yolo
 160
 161                         darknet::LayerParameter lp;
 162                         std::string layer_name = cv::format("bn_%d", layer_id);
 163                         lp.layer_name = layer_name;
 164                         lp.layer_type = bn_param.type;
 165                         lp.layerParams = bn_param;
 166                         lp.bottom_indexes.push_back(last_layer);
 167                         last_layer = layer_name;
 168                         net->layers.push_back(lp);
 169                     }
 170
 171                     layer_id++;
 172                     fused_layer_names.push_back(last_layer);
 173                 }
 174
 175                 void setReLU()
 176                 {
 177                     cv::dnn::LayerParams activation_param;
 178                     activation_param.set<float>("negative_slope", 0.1f);
 179                     activation_param.name = "ReLU-name";
 180                     activation_param.type = "ReLU";
 181
 182                     darknet::LayerParameter lp;
 183                     std::string layer_name = cv::format("relu_%d", layer_id);
 184                     lp.layer_name = layer_name;
 185                     lp.layer_type = activation_param.type;
 186                     lp.layerParams = activation_param;
 187                     lp.bottom_indexes.push_back(last_layer);
 188                     last_layer = layer_name;
 189                     net->layers.push_back(lp);
 190
 191                     fused_layer_names.back() = last_layer;
 192                 }
 193
 194                 void setMaxpool(size_t kernel, size_t pad, size_t stride)
 195                 {
 196                     cv::dnn::LayerParams maxpool_param;
 197                     maxpool_param.set<cv::String>("pool", "max");
 198                     maxpool_param.set<int>("kernel_size", kernel);
 199                     maxpool_param.set<int>("pad", pad);
 200                     maxpool_param.set<int>("stride", stride);
 201                     maxpool_param.set<cv::String>("pad_mode", "SAME");
 202                     maxpool_param.name = "Pooling-name";
 203                     maxpool_param.type = "Pooling";
 204                     darknet::LayerParameter lp;
 205
 206                     std::string layer_name = cv::format("pool_%d", layer_id);
 207                     lp.layer_name = layer_name;
 208                     lp.layer_type = maxpool_param.type;
 209                     lp.layerParams = maxpool_param;
 210                     lp.bottom_indexes.push_back(last_layer);
 211                     last_layer = layer_name;
 212                     net->layers.push_back(lp);
 213                     layer_id++;
 214                     fused_layer_names.push_back(last_layer);
 215                 }
 216
 217                 void setAvgpool()
 218                 {
 219                     cv::dnn::LayerParams avgpool_param;
 220                     avgpool_param.set<cv::String>("pool", "ave");
 221                     avgpool_param.set<bool>("global_pooling", true);
 222                     avgpool_param.name = "Pooling-name";
 223                     avgpool_param.type = "Pooling";
 224                     darknet::LayerParameter lp;
 225
 226                     std::string layer_name = cv::format("avgpool_%d", layer_id);
 227                     lp.layer_name = layer_name;
 228                     lp.layer_type = avgpool_param.type;
 229                     lp.layerParams = avgpool_param;
 230                     lp.bottom_indexes.push_back(last_layer);
 231                     last_layer = layer_name;
 232                     net->layers.push_back(lp);
 233                     layer_id++;
 234                     fused_layer_names.push_back(last_layer);
 235                 }
 236
 237                 void setSoftmax()
 238                 {
 239                     cv::dnn::LayerParams softmax_param;
 240                     softmax_param.name = "Softmax-name";
 241                     softmax_param.type = "Softmax";
 242                     darknet::LayerParameter lp;
 243
 244                     std::string layer_name = cv::format("softmax_%d", layer_id);
 245                     lp.layer_name = layer_name;
 246                     lp.layer_type = softmax_param.type;
 247                     lp.layerParams = softmax_param;
 248                     lp.bottom_indexes.push_back(last_layer);
 249                     last_layer = layer_name;
 250                     net->layers.push_back(lp);
 251                     layer_id++;
 252                     fused_layer_names.push_back(last_layer);
 253                 }
 254
 255                 void setConcat(int number_of_inputs, int *input_indexes)
 256                 {
 257                     cv::dnn::LayerParams concat_param;
 258                     concat_param.name = "Concat-name";
 259                     concat_param.type = "Concat";
 260                     concat_param.set<int>("axis", 1);   // channels are in axis = 1
 261
 262                     darknet::LayerParameter lp;
 263
 264                     std::string layer_name = cv::format("concat_%d", layer_id);
 265                     lp.layer_name = layer_name;
 266                     lp.layer_type = concat_param.type;
 267                     lp.layerParams = concat_param;
 268                     for (int i = 0; i < number_of_inputs; ++i)
 269                         lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i]));
 270
 271                     last_layer = layer_name;
 272                     net->layers.push_back(lp);
 273
 274                     layer_id++;
 275                     fused_layer_names.push_back(last_layer);
 276                 }
 277
 278                 void setIdentity(int bottom_index)
 279                 {
 280                     cv::dnn::LayerParams identity_param;
 281                     identity_param.name = "Identity-name";
 282                     identity_param.type = "Identity";
 283
 284                     darknet::LayerParameter lp;
 285
 286                     std::string layer_name = cv::format("identity_%d", layer_id);
 287                     lp.layer_name = layer_name;
 288                     lp.layer_type = identity_param.type;
 289                     lp.layerParams = identity_param;
 290                     lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index));
 291
 292                     last_layer = layer_name;
 293                     net->layers.push_back(lp);
 294
 295                     layer_id++;
 296                     fused_layer_names.push_back(last_layer);
 297                 }
 298
 299                 void setReorg(int stride)
 300                 {
 301                     cv::dnn::LayerParams reorg_params;
 302                     reorg_params.name = "Reorg-name";
 303                     reorg_params.type = "Reorg";
 304                     reorg_params.set<int>("reorg_stride", stride);
 305
 306                     darknet::LayerParameter lp;
 307                     std::string layer_name = cv::format("reorg_%d", layer_id);
 308                     lp.layer_name = layer_name;
 309                     lp.layer_type = reorg_params.type;
 310                     lp.layerParams = reorg_params;
 311                     lp.bottom_indexes.push_back(last_layer);
 312                     last_layer = layer_name;
 313
 314                     net->layers.push_back(lp);
 315
 316                     layer_id++;
 317                     fused_layer_names.push_back(last_layer);
 318                 }
 319
 320                 void setPermute(bool isDarknetLayer = true)
 321                 {
 322                     cv::dnn::LayerParams permute_params;
 323                     permute_params.name = "Permute-name";
 324                     permute_params.type = "Permute";
 325                     int permute[] = { 0, 2, 3, 1 };
 326                     cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4);
 327
 328                     permute_params.set("order", paramOrder);
 329
 330                     darknet::LayerParameter lp;
 331                     std::string layer_name = cv::format("permute_%d", layer_id);
 332                     lp.layer_name = layer_name;
 333                     lp.layer_type = permute_params.type;
 334                     lp.layerParams = permute_params;
 335                     lp.bottom_indexes.push_back(last_layer);
 336                     last_layer = layer_name;
 337                     net->layers.push_back(lp);
 338
 339                     if (isDarknetLayer)
 340                     {
 341                         layer_id++;
 342                         fused_layer_names.push_back(last_layer);
 343                     }
 344                 }
 345
 346                 void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData)
 347                 {
 348                     cv::dnn::LayerParams region_param;
 349                     region_param.name = "Region-name";
 350                     region_param.type = "Region";
 351
 352                     region_param.set<float>("thresh", thresh);
 353                     region_param.set<int>("coords", coords);
 354                     region_param.set<int>("classes", classes);
 355                     region_param.set<int>("anchors", anchors);
 356                     region_param.set<int>("classfix", classfix);
 357                     region_param.set<bool>("softmax_tree", softmax_tree);
 358                     region_param.set<bool>("softmax", softmax);
 359
 360                     cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone();
 361                     region_param.blobs.push_back(biasData_mat);
 362
 363                     darknet::LayerParameter lp;
 364                     std::string layer_name = "detection_out";
 365                     lp.layer_name = layer_name;
 366                     lp.layer_type = region_param.type;
 367                     lp.layerParams = region_param;
 368                     lp.bottom_indexes.push_back(last_layer);
 369                     last_layer = layer_name;
 370                     net->layers.push_back(lp);
 371
 372                     layer_id++;
 373                     fused_layer_names.push_back(last_layer);
 374                 }
 375
 376                 void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold)
 377                 {
 378                     cv::dnn::LayerParams region_param;
 379                     region_param.name = "Region-name";
 380                     region_param.type = "Region";
 381
 382                     const int numAnchors = mask.size();
 383
 384                     region_param.set<int>("classes", classes);
 385                     region_param.set<int>("anchors", numAnchors);
 386                     region_param.set<bool>("logistic", true);
 387                     region_param.set<float>("thresh", thresh);
 388                     region_param.set<float>("nms_threshold", nms_threshold);
 389
 390                     std::vector<float> usedAnchors(numAnchors * 2);
 391                     for (int i = 0; i < numAnchors; ++i)
 392                     {
 393                         usedAnchors[i * 2] = anchors[mask[i] * 2];
 394                         usedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1];
 395                     }
 396
 397                     cv::Mat biasData_mat = cv::Mat(1, numAnchors * 2, CV_32F, &usedAnchors[0]).clone();
 398                     region_param.blobs.push_back(biasData_mat);
 399
 400                     darknet::LayerParameter lp;
 401                     std::string layer_name = cv::format("yolo_%d", layer_id);
 402                     lp.layer_name = layer_name;
 403                     lp.layer_type = region_param.type;
 404                     lp.layerParams = region_param;
 405                     lp.bottom_indexes.push_back(last_layer);
 406                     lp.bottom_indexes.push_back(kFirstLayerName);
 407                     last_layer = layer_name;
 408                     net->layers.push_back(lp);
 409
 410                     layer_id++;
 411                     fused_layer_names.push_back(last_layer);
 412                 }
 413
 414                 void setShortcut(int from, float alpha)
 415                 {
 416                     cv::dnn::LayerParams shortcut_param;
 417                     shortcut_param.name = "Shortcut-name";
 418                     shortcut_param.type = "Eltwise";
 419
 420                     if (alpha != 1)
 421                     {
 422                         std::vector<float> coeffs(2, 1);
 423                         coeffs[0] = alpha;
 424                         shortcut_param.set("coeff", DictValue::arrayReal<float*>(&coeffs[0], coeffs.size()));
 425                     }
 426
 427                     shortcut_param.set<std::string>("op", "sum");
 428
 429                     darknet::LayerParameter lp;
 430                     std::string layer_name = cv::format("shortcut_%d", layer_id);
 431                     lp.layer_name = layer_name;
 432                     lp.layer_type = shortcut_param.type;
 433                     lp.layerParams = shortcut_param;
 434                     lp.bottom_indexes.push_back(last_layer);
 435                     lp.bottom_indexes.push_back(fused_layer_names.at(from));
 436                     last_layer = layer_name;
 437                     net->layers.push_back(lp);
 438
 439                     layer_id++;
 440                     fused_layer_names.push_back(last_layer);
 441                 }
 442
 443                 void setUpsample(int scaleFactor)
 444                 {
 445                     cv::dnn::LayerParams param;
 446                     param.name = "Upsample-name";
 447                     param.type = "Resize";
 448
 449                     param.set<int>("zoom_factor", scaleFactor);
 450                     param.set<String>("interpolation", "nearest");
 451
 452                     darknet::LayerParameter lp;
 453                     std::string layer_name = cv::format("upsample_%d", layer_id);
 454                     lp.layer_name = layer_name;
 455                     lp.layer_type = param.type;
 456                     lp.layerParams = param;
 457                     lp.bottom_indexes.push_back(last_layer);
 458                     last_layer = layer_name;
 459                     net->layers.push_back(lp);
 460
 461                     layer_id++;
 462                     fused_layer_names.push_back(last_layer);
 463                 }
 464             };
 465
 466             std::string escapeString(const std::string &src)
 467             {
 468                 std::string dst;
 469                 for (size_t i = 0; i < src.size(); ++i)
 470                     if (src[i] > ' ' && src[i] <= 'z')
 471                         dst += src[i];
 472                 return dst;
 473             }
 474
 475             template<typename T>
 476             std::vector<T> getNumbers(const std::string &src)
 477             {
 478                 std::vector<T> dst;
 479                 std::stringstream ss(src);
 480
 481                 for (std::string str; std::getline(ss, str, ',');) {
 482                     std::stringstream line(str);
 483                     T val;
 484                     line >> val;
 485                     dst.push_back(val);
 486                 }
 487                 return dst;
 488             }
 489
 490             bool ReadDarknetFromCfgStream(std::istream &ifile, NetParameter *net)
 491             {
 492                 bool read_net = false;
 493                 int layers_counter = -1;
 494                 for (std::string line; std::getline(ifile, line);) {
 495                     line = escapeString(line);
 496                     if (line.empty()) continue;
 497                     switch (line[0]) {
 498                     case '\0': break;
 499                     case '#': break;
 500                     case ';': break;
 501                     case '[':
 502                         if (line == "[net]") {
 503                             read_net = true;
 504                         }
 505                         else {
 506                             // read section
 507                             read_net = false;
 508                             ++layers_counter;
 509                             const size_t layer_type_size = line.find("]") - 1;
 510                             CV_Assert(layer_type_size < line.size());
 511                             std::string layer_type = line.substr(1, layer_type_size);
 512                             net->layers_cfg[layers_counter]["type"] = layer_type;
 513                         }
 514                         break;
 515                     default:
 516                         // read entry
 517                         const size_t separator_index = line.find('=');
 518                         CV_Assert(separator_index < line.size());
 519                         if (separator_index != std::string::npos) {
 520                             std::string name = line.substr(0, separator_index);
 521                             std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1));
 522                             name = escapeString(name);
 523                             value = escapeString(value);
 524                             if (name.empty() || value.empty()) continue;
 525                             if (read_net)
 526                                 net->net_cfg[name] = value;
 527                             else
 528                                 net->layers_cfg[layers_counter][name] = value;
 529                         }
 530                     }
 531                 }
 532
 533                 std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"];
 534                 std::vector<float> vec = getNumbers<float>(anchors);
 535                 std::map<std::string, std::string> &net_params = net->net_cfg;
 536                 net->width = getParam(net_params, "width", 416);
 537                 net->height = getParam(net_params, "height", 416);
 538                 net->channels = getParam(net_params, "channels", 3);
 539                 CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0);
 540
 541                 int current_channels = net->channels;
 542                 net->out_channels_vec.resize(net->layers_cfg.size());
 543
 544                 layers_counter = -1;
 545
 546                 setLayersParams setParams(net);
 547
 548                 typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
 549                 for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
 550                     ++layers_counter;
 551                     std::map<std::string, std::string> &layer_params = i->second;
 552                     std::string layer_type = layer_params["type"];
 553
 554                     if (layer_type == "convolutional")
 555                     {
 556                         int kernel_size = getParam<int>(layer_params, "size", -1);
 557                         int pad = getParam<int>(layer_params, "pad", 0);
 558                         int stride = getParam<int>(layer_params, "stride", 1);
 559                         int filters = getParam<int>(layer_params, "filters", -1);
 560                         bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
 561                         int flipped = getParam<int>(layer_params, "flipped", 0);
 562                         if (flipped == 1)
 563                             CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
 564
 565                         // correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file
 566                         if (kernel_size < 3) pad = 0;
 567
 568                         CV_Assert(kernel_size > 0 && filters > 0);
 569                         CV_Assert(current_channels > 0);
 570
 571                         setParams.setConvolution(kernel_size, pad, stride, filters, current_channels,
 572                             batch_normalize);
 573
 574                         current_channels = filters;
 575                     }
 576                     else if (layer_type == "maxpool")
 577                     {
 578                         int kernel_size = getParam<int>(layer_params, "size", 2);
 579                         int stride = getParam<int>(layer_params, "stride", 2);
 580                         int pad = getParam<int>(layer_params, "pad", 0);
 581                         setParams.setMaxpool(kernel_size, pad, stride);
 582                     }
 583                     else if (layer_type == "avgpool")
 584                     {
 585                         setParams.setAvgpool();
 586                     }
 587                     else if (layer_type == "softmax")
 588                     {
 589                         int groups = getParam<int>(layer_params, "groups", 1);
 590                         if (groups != 1)
 591                             CV_Error(Error::StsNotImplemented, "Softmax from Darknet with groups != 1");
 592                         setParams.setSoftmax();
 593                     }
 594                     else if (layer_type == "route")
 595                     {
 596                         std::string bottom_layers = getParam<std::string>(layer_params, "layers", "");
 597                         CV_Assert(!bottom_layers.empty());
 598                         std::vector<int> layers_vec = getNumbers<int>(bottom_layers);
 599
 600                         current_channels = 0;
 601                         for (size_t k = 0; k < layers_vec.size(); ++k) {
 602                             layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter);
 603                             current_channels += net->out_channels_vec[layers_vec[k]];
 604                         }
 605
 606                         if (layers_vec.size() == 1)
 607                             setParams.setIdentity(layers_vec.at(0));
 608                         else
 609                             setParams.setConcat(layers_vec.size(), layers_vec.data());
 610                     }
 611                     else if (layer_type == "reorg")
 612                     {
 613                         int stride = getParam<int>(layer_params, "stride", 2);
 614                         current_channels = current_channels * (stride*stride);
 615
 616                         setParams.setReorg(stride);
 617                     }
 618                     else if (layer_type == "region")
 619                     {
 620                         float thresh = getParam<float>(layer_params, "thresh", 0.001);
 621                         int coords = getParam<int>(layer_params, "coords", 4);
 622                         int classes = getParam<int>(layer_params, "classes", -1);
 623                         int num_of_anchors = getParam<int>(layer_params, "num", -1);
 624                         int classfix = getParam<int>(layer_params, "classfix", 0);
 625                         bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1);
 626                         bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0);
 627
 628                         std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
 629                         CV_Assert(!anchors_values.empty());
 630                         std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
 631
 632                         CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
 633
 634                         setParams.setPermute(false);
 635                         setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data());
 636                     }
 637                     else if (layer_type == "shortcut")
 638                     {
 639                         std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
 640                         float alpha = getParam<float>(layer_params, "alpha", 1);
 641                         float beta = getParam<float>(layer_params, "beta", 0);
 642                         if (beta != 0)
 643                             CV_Error(Error::StsNotImplemented, "Non-zero beta");
 644                         CV_Assert(!bottom_layer.empty());
 645                         int from = std::atoi(bottom_layer.c_str());
 646
 647                         from = from < 0 ? from + layers_counter : from;
 648                         setParams.setShortcut(from, alpha);
 649                     }
 650                     else if (layer_type == "upsample")
 651                     {
 652                         int scaleFactor = getParam<int>(layer_params, "stride", 1);
 653                         setParams.setUpsample(scaleFactor);
 654                     }
 655                     else if (layer_type == "yolo")
 656                     {
 657                         int classes = getParam<int>(layer_params, "classes", -1);
 658                         int num_of_anchors = getParam<int>(layer_params, "num", -1);
 659                         float thresh = getParam<float>(layer_params, "thresh", 0.2);
 660                         float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.4);
 661
 662                         std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
 663                         CV_Assert(!anchors_values.empty());
 664                         std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
 665
 666                         std::string mask_values = getParam<std::string>(layer_params, "mask", std::string());
 667                         CV_Assert(!mask_values.empty());
 668                         std::vector<int> mask_vec = getNumbers<int>(mask_values);
 669
 670                         CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
 671
 672                         setParams.setPermute(false);
 673                         setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold);
 674                     }
 675                     else {
 676                         CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
 677                     }
 678
 679                     std::string activation = getParam<std::string>(layer_params, "activation", "linear");
 680                     if (activation == "leaky")
 681                     {
 682                         setParams.setReLU();
 683                     }
 684                     else if (activation != "linear")
 685                         CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
 686
 687                     net->out_channels_vec[layers_counter] = current_channels;
 688                 }
 689
 690                 return true;
 691             }
 692
 693             bool ReadDarknetFromWeightsStream(std::istream &ifile, NetParameter *net)
 694             {
 695                 int32_t major_ver, minor_ver, revision;
 696                 ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t));
 697                 ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t));
 698                 ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t));
 699
 700                 uint64_t seen;
 701                 if ((major_ver * 10 + minor_ver) >= 2) {
 702                     ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t));
 703                 }
 704                 else {
 705                     int32_t iseen = 0;
 706                     ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t));
 707                     seen = iseen;
 708                 }
 709                 bool transpose = (major_ver > 1000) || (minor_ver > 1000);
 710                 if(transpose)
 711                     CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented");
 712
 713                 int current_channels = net->channels;
 714                 int cv_layers_counter = -1;
 715                 int darknet_layers_counter = -1;
 716
 717                 setLayersParams setParams(net);
 718
 719                 typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
 720                 for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
 721                     ++darknet_layers_counter;
 722                     ++cv_layers_counter;
 723                     std::map<std::string, std::string> &layer_params = i->second;
 724                     std::string layer_type = layer_params["type"];
 725
 726                     if (layer_type == "convolutional")
 727                     {
 728                         int kernel_size = getParam<int>(layer_params, "size", -1);
 729                         int filters = getParam<int>(layer_params, "filters", -1);
 730                         bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
 731
 732                         CV_Assert(kernel_size > 0 && filters > 0);
 733                         CV_Assert(current_channels > 0);
 734
 735                         size_t const weights_size = filters * current_channels * kernel_size * kernel_size;
 736                         int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size };
 737                         cv::Mat weightsBlob;
 738                         weightsBlob.create(4, sizes_weights, CV_32F);
 739                         CV_Assert(weightsBlob.isContinuous());
 740
 741                         cv::Mat meanData_mat(1, filters, CV_32F);       // mean
 742                         cv::Mat stdData_mat(1, filters, CV_32F);        // variance
 743                         cv::Mat weightsData_mat(1, filters, CV_32F);// scale
 744                         cv::Mat biasData_mat(1, filters, CV_32F);       // bias
 745
 746                         ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters);
 747                         if (use_batch_normalize) {
 748                             ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters);
 749                             ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters);
 750                             ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters);
 751                         }
 752                         ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size);
 753
 754                         // set convolutional weights
 755                         std::vector<cv::Mat> conv_blobs;
 756                         conv_blobs.push_back(weightsBlob);
 757                         if (!use_batch_normalize) {
 758                             // use BIAS in any case
 759                             conv_blobs.push_back(biasData_mat);
 760                         }
 761                         setParams.setLayerBlobs(cv_layers_counter, conv_blobs);
 762
 763                         // set batch normalize (mean, variance, scale, bias)
 764                         if (use_batch_normalize) {
 765                             ++cv_layers_counter;
 766                             std::vector<cv::Mat> bn_blobs;
 767                             bn_blobs.push_back(meanData_mat);
 768                             bn_blobs.push_back(stdData_mat);
 769                             bn_blobs.push_back(weightsData_mat);
 770                             bn_blobs.push_back(biasData_mat);
 771                             setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
 772                         }
 773                     }
 774                     if (layer_type == "region" || layer_type == "yolo")
 775                     {
 776                         ++cv_layers_counter;  // For permute.
 777                     }
 778
 779                     std::string activation = getParam<std::string>(layer_params, "activation", "linear");
 780                     if(activation == "leaky")
 781                         ++cv_layers_counter;  // For ReLU
 782
 783                     current_channels = net->out_channels_vec[darknet_layers_counter];
 784                 }
 785                 return true;
 786             }
 787
 788         }
 789
 790
 791         void ReadNetParamsFromCfgStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
 792         {
 793             if (!darknet::ReadDarknetFromCfgStream(ifile, net)) {
 794                 CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
 795             }
 796         }
 797
 798         void ReadNetParamsFromBinaryStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
 799         {
 800             if (!darknet::ReadDarknetFromWeightsStream(ifile, net)) {
 801                 CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
 802             }
 803         }
 804     }
 805 }