3a90081e1784a4d07b19e8e53a6cac3fc6f060c3
[platform/upstream/opencv.git] / modules / dnn / src / darknet / darknet_io.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //                        (3-clause BSD License)
13 //
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistributions of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistributions in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * Neither the names of the copyright holders nor the names of the contributors
28 // may be used to endorse or promote products derived from this software
29 // without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall copyright holders or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 /*M///////////////////////////////////////////////////////////////////////////////////////
45 //MIT License
46 //
47 //Copyright (c) 2017 Joseph Redmon
48 //
49 //Permission is hereby granted, free of charge, to any person obtaining a copy
50 //of this software and associated documentation files (the "Software"), to deal
51 //in the Software without restriction, including without limitation the rights
52 //to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
53 //copies of the Software, and to permit persons to whom the Software is
54 //furnished to do so, subject to the following conditions:
55 //
56 //The above copyright notice and this permission notice shall be included in all
57 //copies or substantial portions of the Software.
58 //
59 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
60 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
61 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
62 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
63 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
64 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
65 //SOFTWARE.
66 //
67 //M*/
68
69 #include "../precomp.hpp"
70
71 #include <iostream>
72 #include <fstream>
73 #include <sstream>
74
75 #include "darknet_io.hpp"
76
77 namespace cv {
78     namespace dnn {
79         namespace darknet {
80
81             template<typename T>
82             T getParam(const std::map<std::string, std::string> &params, const std::string param_name, T init_val)
83             {
84                 std::map<std::string, std::string>::const_iterator it = params.find(param_name);
85                 if (it != params.end()) {
86                     std::stringstream ss(it->second);
87                     ss >> init_val;
88                 }
89                 return init_val;
90             }
91
92             static const std::string kFirstLayerName = "data";
93
94             class setLayersParams {
95
96                 NetParameter *net;
97                 int layer_id;
98                 std::string last_layer;
99                 std::vector<std::string> fused_layer_names;
100
101             public:
102                 setLayersParams(NetParameter *_net) :
103                     net(_net), layer_id(0), last_layer(kFirstLayerName)
104                 {}
105
106                 void setLayerBlobs(int i, std::vector<cv::Mat> blobs)
107                 {
108                     cv::dnn::LayerParams &params = net->layers[i].layerParams;
109                     params.blobs = blobs;
110                 }
111
112                 cv::dnn::LayerParams getParamConvolution(int kernel, int pad,
113                     int stride, int filters_num)
114                 {
115                     cv::dnn::LayerParams params;
116                     params.name = "Convolution-name";
117                     params.type = "Convolution";
118
119                     params.set<int>("kernel_size", kernel);
120                     params.set<int>("pad", pad);
121                     params.set<int>("stride", stride);
122
123                     params.set<bool>("bias_term", false);       // true only if(BatchNorm == false)
124                     params.set<int>("num_output", filters_num);
125
126                     return params;
127                 }
128
129
130                 void setConvolution(int kernel, int pad, int stride,
131                     int filters_num, int channels_num, int use_batch_normalize)
132                 {
133                     cv::dnn::LayerParams conv_param =
134                         getParamConvolution(kernel, pad, stride, filters_num);
135
136                     darknet::LayerParameter lp;
137                     std::string layer_name = cv::format("conv_%d", layer_id);
138
139                     // use BIAS in any case
140                     if (!use_batch_normalize) {
141                         conv_param.set<bool>("bias_term", true);
142                     }
143
144                     lp.layer_name = layer_name;
145                     lp.layer_type = conv_param.type;
146                     lp.layerParams = conv_param;
147                     lp.bottom_indexes.push_back(last_layer);
148                     last_layer = layer_name;
149                     net->layers.push_back(lp);
150
151                     if (use_batch_normalize)
152                     {
153                         cv::dnn::LayerParams bn_param;
154
155                         bn_param.name = "BatchNorm-name";
156                         bn_param.type = "BatchNorm";
157                         bn_param.set<bool>("has_weight", true);
158                         bn_param.set<bool>("has_bias", true);
159                         bn_param.set<float>("eps", 1E-6);       // .000001f in Darknet Yolo
160
161                         darknet::LayerParameter lp;
162                         std::string layer_name = cv::format("bn_%d", layer_id);
163                         lp.layer_name = layer_name;
164                         lp.layer_type = bn_param.type;
165                         lp.layerParams = bn_param;
166                         lp.bottom_indexes.push_back(last_layer);
167                         last_layer = layer_name;
168                         net->layers.push_back(lp);
169                     }
170
171                     layer_id++;
172                     fused_layer_names.push_back(last_layer);
173                 }
174
175                 void setReLU()
176                 {
177                     cv::dnn::LayerParams activation_param;
178                     activation_param.set<float>("negative_slope", 0.1f);
179                     activation_param.name = "ReLU-name";
180                     activation_param.type = "ReLU";
181
182                     darknet::LayerParameter lp;
183                     std::string layer_name = cv::format("relu_%d", layer_id);
184                     lp.layer_name = layer_name;
185                     lp.layer_type = activation_param.type;
186                     lp.layerParams = activation_param;
187                     lp.bottom_indexes.push_back(last_layer);
188                     last_layer = layer_name;
189                     net->layers.push_back(lp);
190
191                     fused_layer_names.back() = last_layer;
192                 }
193
194                 void setMaxpool(size_t kernel, size_t pad, size_t stride)
195                 {
196                     cv::dnn::LayerParams maxpool_param;
197                     maxpool_param.set<cv::String>("pool", "max");
198                     maxpool_param.set<int>("kernel_size", kernel);
199                     maxpool_param.set<int>("pad", pad);
200                     maxpool_param.set<int>("stride", stride);
201                     maxpool_param.set<cv::String>("pad_mode", "SAME");
202                     maxpool_param.name = "Pooling-name";
203                     maxpool_param.type = "Pooling";
204                     darknet::LayerParameter lp;
205
206                     std::string layer_name = cv::format("pool_%d", layer_id);
207                     lp.layer_name = layer_name;
208                     lp.layer_type = maxpool_param.type;
209                     lp.layerParams = maxpool_param;
210                     lp.bottom_indexes.push_back(last_layer);
211                     last_layer = layer_name;
212                     net->layers.push_back(lp);
213                     layer_id++;
214                     fused_layer_names.push_back(last_layer);
215                 }
216
217                 void setAvgpool()
218                 {
219                     cv::dnn::LayerParams avgpool_param;
220                     avgpool_param.set<cv::String>("pool", "ave");
221                     avgpool_param.set<bool>("global_pooling", true);
222                     avgpool_param.name = "Pooling-name";
223                     avgpool_param.type = "Pooling";
224                     darknet::LayerParameter lp;
225
226                     std::string layer_name = cv::format("avgpool_%d", layer_id);
227                     lp.layer_name = layer_name;
228                     lp.layer_type = avgpool_param.type;
229                     lp.layerParams = avgpool_param;
230                     lp.bottom_indexes.push_back(last_layer);
231                     last_layer = layer_name;
232                     net->layers.push_back(lp);
233                     layer_id++;
234                     fused_layer_names.push_back(last_layer);
235                 }
236
237                 void setSoftmax()
238                 {
239                     cv::dnn::LayerParams softmax_param;
240                     softmax_param.name = "Softmax-name";
241                     softmax_param.type = "Softmax";
242                     darknet::LayerParameter lp;
243
244                     std::string layer_name = cv::format("softmax_%d", layer_id);
245                     lp.layer_name = layer_name;
246                     lp.layer_type = softmax_param.type;
247                     lp.layerParams = softmax_param;
248                     lp.bottom_indexes.push_back(last_layer);
249                     last_layer = layer_name;
250                     net->layers.push_back(lp);
251                     layer_id++;
252                     fused_layer_names.push_back(last_layer);
253                 }
254
255                 void setConcat(int number_of_inputs, int *input_indexes)
256                 {
257                     cv::dnn::LayerParams concat_param;
258                     concat_param.name = "Concat-name";
259                     concat_param.type = "Concat";
260                     concat_param.set<int>("axis", 1);   // channels are in axis = 1
261
262                     darknet::LayerParameter lp;
263
264                     std::string layer_name = cv::format("concat_%d", layer_id);
265                     lp.layer_name = layer_name;
266                     lp.layer_type = concat_param.type;
267                     lp.layerParams = concat_param;
268                     for (int i = 0; i < number_of_inputs; ++i)
269                         lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i]));
270
271                     last_layer = layer_name;
272                     net->layers.push_back(lp);
273
274                     layer_id++;
275                     fused_layer_names.push_back(last_layer);
276                 }
277
278                 void setIdentity(int bottom_index)
279                 {
280                     cv::dnn::LayerParams identity_param;
281                     identity_param.name = "Identity-name";
282                     identity_param.type = "Identity";
283
284                     darknet::LayerParameter lp;
285
286                     std::string layer_name = cv::format("identity_%d", layer_id);
287                     lp.layer_name = layer_name;
288                     lp.layer_type = identity_param.type;
289                     lp.layerParams = identity_param;
290                     lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index));
291
292                     last_layer = layer_name;
293                     net->layers.push_back(lp);
294
295                     layer_id++;
296                     fused_layer_names.push_back(last_layer);
297                 }
298
299                 void setReorg(int stride)
300                 {
301                     cv::dnn::LayerParams reorg_params;
302                     reorg_params.name = "Reorg-name";
303                     reorg_params.type = "Reorg";
304                     reorg_params.set<int>("reorg_stride", stride);
305
306                     darknet::LayerParameter lp;
307                     std::string layer_name = cv::format("reorg_%d", layer_id);
308                     lp.layer_name = layer_name;
309                     lp.layer_type = reorg_params.type;
310                     lp.layerParams = reorg_params;
311                     lp.bottom_indexes.push_back(last_layer);
312                     last_layer = layer_name;
313
314                     net->layers.push_back(lp);
315
316                     layer_id++;
317                     fused_layer_names.push_back(last_layer);
318                 }
319
320                 void setPermute(bool isDarknetLayer = true)
321                 {
322                     cv::dnn::LayerParams permute_params;
323                     permute_params.name = "Permute-name";
324                     permute_params.type = "Permute";
325                     int permute[] = { 0, 2, 3, 1 };
326                     cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4);
327
328                     permute_params.set("order", paramOrder);
329
330                     darknet::LayerParameter lp;
331                     std::string layer_name = cv::format("permute_%d", layer_id);
332                     lp.layer_name = layer_name;
333                     lp.layer_type = permute_params.type;
334                     lp.layerParams = permute_params;
335                     lp.bottom_indexes.push_back(last_layer);
336                     last_layer = layer_name;
337                     net->layers.push_back(lp);
338
339                     if (isDarknetLayer)
340                     {
341                         layer_id++;
342                         fused_layer_names.push_back(last_layer);
343                     }
344                 }
345
346                 void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData)
347                 {
348                     cv::dnn::LayerParams region_param;
349                     region_param.name = "Region-name";
350                     region_param.type = "Region";
351
352                     region_param.set<float>("thresh", thresh);
353                     region_param.set<int>("coords", coords);
354                     region_param.set<int>("classes", classes);
355                     region_param.set<int>("anchors", anchors);
356                     region_param.set<int>("classfix", classfix);
357                     region_param.set<bool>("softmax_tree", softmax_tree);
358                     region_param.set<bool>("softmax", softmax);
359
360                     cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone();
361                     region_param.blobs.push_back(biasData_mat);
362
363                     darknet::LayerParameter lp;
364                     std::string layer_name = "detection_out";
365                     lp.layer_name = layer_name;
366                     lp.layer_type = region_param.type;
367                     lp.layerParams = region_param;
368                     lp.bottom_indexes.push_back(last_layer);
369                     last_layer = layer_name;
370                     net->layers.push_back(lp);
371
372                     layer_id++;
373                     fused_layer_names.push_back(last_layer);
374                 }
375
376                 void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold)
377                 {
378                     cv::dnn::LayerParams region_param;
379                     region_param.name = "Region-name";
380                     region_param.type = "Region";
381
382                     const int numAnchors = mask.size();
383
384                     region_param.set<int>("classes", classes);
385                     region_param.set<int>("anchors", numAnchors);
386                     region_param.set<bool>("logistic", true);
387                     region_param.set<float>("thresh", thresh);
388                     region_param.set<float>("nms_threshold", nms_threshold);
389
390                     std::vector<float> usedAnchors(numAnchors * 2);
391                     for (int i = 0; i < numAnchors; ++i)
392                     {
393                         usedAnchors[i * 2] = anchors[mask[i] * 2];
394                         usedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1];
395                     }
396
397                     cv::Mat biasData_mat = cv::Mat(1, numAnchors * 2, CV_32F, &usedAnchors[0]).clone();
398                     region_param.blobs.push_back(biasData_mat);
399
400                     darknet::LayerParameter lp;
401                     std::string layer_name = cv::format("yolo_%d", layer_id);
402                     lp.layer_name = layer_name;
403                     lp.layer_type = region_param.type;
404                     lp.layerParams = region_param;
405                     lp.bottom_indexes.push_back(last_layer);
406                     lp.bottom_indexes.push_back(kFirstLayerName);
407                     last_layer = layer_name;
408                     net->layers.push_back(lp);
409
410                     layer_id++;
411                     fused_layer_names.push_back(last_layer);
412                 }
413
414                 void setShortcut(int from, float alpha)
415                 {
416                     cv::dnn::LayerParams shortcut_param;
417                     shortcut_param.name = "Shortcut-name";
418                     shortcut_param.type = "Eltwise";
419
420                     if (alpha != 1)
421                     {
422                         std::vector<float> coeffs(2, 1);
423                         coeffs[0] = alpha;
424                         shortcut_param.set("coeff", DictValue::arrayReal<float*>(&coeffs[0], coeffs.size()));
425                     }
426
427                     shortcut_param.set<std::string>("op", "sum");
428
429                     darknet::LayerParameter lp;
430                     std::string layer_name = cv::format("shortcut_%d", layer_id);
431                     lp.layer_name = layer_name;
432                     lp.layer_type = shortcut_param.type;
433                     lp.layerParams = shortcut_param;
434                     lp.bottom_indexes.push_back(last_layer);
435                     lp.bottom_indexes.push_back(fused_layer_names.at(from));
436                     last_layer = layer_name;
437                     net->layers.push_back(lp);
438
439                     layer_id++;
440                     fused_layer_names.push_back(last_layer);
441                 }
442
443                 void setUpsample(int scaleFactor)
444                 {
445                     cv::dnn::LayerParams param;
446                     param.name = "Upsample-name";
447                     param.type = "Resize";
448
449                     param.set<int>("zoom_factor", scaleFactor);
450                     param.set<String>("interpolation", "nearest");
451
452                     darknet::LayerParameter lp;
453                     std::string layer_name = cv::format("upsample_%d", layer_id);
454                     lp.layer_name = layer_name;
455                     lp.layer_type = param.type;
456                     lp.layerParams = param;
457                     lp.bottom_indexes.push_back(last_layer);
458                     last_layer = layer_name;
459                     net->layers.push_back(lp);
460
461                     layer_id++;
462                     fused_layer_names.push_back(last_layer);
463                 }
464             };
465
466             std::string escapeString(const std::string &src)
467             {
468                 std::string dst;
469                 for (size_t i = 0; i < src.size(); ++i)
470                     if (src[i] > ' ' && src[i] <= 'z')
471                         dst += src[i];
472                 return dst;
473             }
474
475             template<typename T>
476             std::vector<T> getNumbers(const std::string &src)
477             {
478                 std::vector<T> dst;
479                 std::stringstream ss(src);
480
481                 for (std::string str; std::getline(ss, str, ',');) {
482                     std::stringstream line(str);
483                     T val;
484                     line >> val;
485                     dst.push_back(val);
486                 }
487                 return dst;
488             }
489
490             bool ReadDarknetFromCfgStream(std::istream &ifile, NetParameter *net)
491             {
492                 bool read_net = false;
493                 int layers_counter = -1;
494                 for (std::string line; std::getline(ifile, line);) {
495                     line = escapeString(line);
496                     if (line.empty()) continue;
497                     switch (line[0]) {
498                     case '\0': break;
499                     case '#': break;
500                     case ';': break;
501                     case '[':
502                         if (line == "[net]") {
503                             read_net = true;
504                         }
505                         else {
506                             // read section
507                             read_net = false;
508                             ++layers_counter;
509                             const size_t layer_type_size = line.find("]") - 1;
510                             CV_Assert(layer_type_size < line.size());
511                             std::string layer_type = line.substr(1, layer_type_size);
512                             net->layers_cfg[layers_counter]["type"] = layer_type;
513                         }
514                         break;
515                     default:
516                         // read entry
517                         const size_t separator_index = line.find('=');
518                         CV_Assert(separator_index < line.size());
519                         if (separator_index != std::string::npos) {
520                             std::string name = line.substr(0, separator_index);
521                             std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1));
522                             name = escapeString(name);
523                             value = escapeString(value);
524                             if (name.empty() || value.empty()) continue;
525                             if (read_net)
526                                 net->net_cfg[name] = value;
527                             else
528                                 net->layers_cfg[layers_counter][name] = value;
529                         }
530                     }
531                 }
532
533                 std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"];
534                 std::vector<float> vec = getNumbers<float>(anchors);
535                 std::map<std::string, std::string> &net_params = net->net_cfg;
536                 net->width = getParam(net_params, "width", 416);
537                 net->height = getParam(net_params, "height", 416);
538                 net->channels = getParam(net_params, "channels", 3);
539                 CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0);
540
541                 int current_channels = net->channels;
542                 net->out_channels_vec.resize(net->layers_cfg.size());
543
544                 layers_counter = -1;
545
546                 setLayersParams setParams(net);
547
548                 typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
549                 for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
550                     ++layers_counter;
551                     std::map<std::string, std::string> &layer_params = i->second;
552                     std::string layer_type = layer_params["type"];
553
554                     if (layer_type == "convolutional")
555                     {
556                         int kernel_size = getParam<int>(layer_params, "size", -1);
557                         int pad = getParam<int>(layer_params, "pad", 0);
558                         int stride = getParam<int>(layer_params, "stride", 1);
559                         int filters = getParam<int>(layer_params, "filters", -1);
560                         bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
561                         int flipped = getParam<int>(layer_params, "flipped", 0);
562                         if (flipped == 1)
563                             CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
564
565                         // correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file
566                         if (kernel_size < 3) pad = 0;
567
568                         CV_Assert(kernel_size > 0 && filters > 0);
569                         CV_Assert(current_channels > 0);
570
571                         setParams.setConvolution(kernel_size, pad, stride, filters, current_channels,
572                             batch_normalize);
573
574                         current_channels = filters;
575                     }
576                     else if (layer_type == "maxpool")
577                     {
578                         int kernel_size = getParam<int>(layer_params, "size", 2);
579                         int stride = getParam<int>(layer_params, "stride", 2);
580                         int pad = getParam<int>(layer_params, "pad", 0);
581                         setParams.setMaxpool(kernel_size, pad, stride);
582                     }
583                     else if (layer_type == "avgpool")
584                     {
585                         setParams.setAvgpool();
586                     }
587                     else if (layer_type == "softmax")
588                     {
589                         int groups = getParam<int>(layer_params, "groups", 1);
590                         if (groups != 1)
591                             CV_Error(Error::StsNotImplemented, "Softmax from Darknet with groups != 1");
592                         setParams.setSoftmax();
593                     }
594                     else if (layer_type == "route")
595                     {
596                         std::string bottom_layers = getParam<std::string>(layer_params, "layers", "");
597                         CV_Assert(!bottom_layers.empty());
598                         std::vector<int> layers_vec = getNumbers<int>(bottom_layers);
599
600                         current_channels = 0;
601                         for (size_t k = 0; k < layers_vec.size(); ++k) {
602                             layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter);
603                             current_channels += net->out_channels_vec[layers_vec[k]];
604                         }
605
606                         if (layers_vec.size() == 1)
607                             setParams.setIdentity(layers_vec.at(0));
608                         else
609                             setParams.setConcat(layers_vec.size(), layers_vec.data());
610                     }
611                     else if (layer_type == "reorg")
612                     {
613                         int stride = getParam<int>(layer_params, "stride", 2);
614                         current_channels = current_channels * (stride*stride);
615
616                         setParams.setReorg(stride);
617                     }
618                     else if (layer_type == "region")
619                     {
620                         float thresh = getParam<float>(layer_params, "thresh", 0.001);
621                         int coords = getParam<int>(layer_params, "coords", 4);
622                         int classes = getParam<int>(layer_params, "classes", -1);
623                         int num_of_anchors = getParam<int>(layer_params, "num", -1);
624                         int classfix = getParam<int>(layer_params, "classfix", 0);
625                         bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1);
626                         bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0);
627
628                         std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
629                         CV_Assert(!anchors_values.empty());
630                         std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
631
632                         CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
633
634                         setParams.setPermute(false);
635                         setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data());
636                     }
637                     else if (layer_type == "shortcut")
638                     {
639                         std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
640                         float alpha = getParam<float>(layer_params, "alpha", 1);
641                         float beta = getParam<float>(layer_params, "beta", 0);
642                         if (beta != 0)
643                             CV_Error(Error::StsNotImplemented, "Non-zero beta");
644                         CV_Assert(!bottom_layer.empty());
645                         int from = std::atoi(bottom_layer.c_str());
646
647                         from = from < 0 ? from + layers_counter : from;
648                         setParams.setShortcut(from, alpha);
649                     }
650                     else if (layer_type == "upsample")
651                     {
652                         int scaleFactor = getParam<int>(layer_params, "stride", 1);
653                         setParams.setUpsample(scaleFactor);
654                     }
655                     else if (layer_type == "yolo")
656                     {
657                         int classes = getParam<int>(layer_params, "classes", -1);
658                         int num_of_anchors = getParam<int>(layer_params, "num", -1);
659                         float thresh = getParam<float>(layer_params, "thresh", 0.2);
660                         float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.4);
661
662                         std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
663                         CV_Assert(!anchors_values.empty());
664                         std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
665
666                         std::string mask_values = getParam<std::string>(layer_params, "mask", std::string());
667                         CV_Assert(!mask_values.empty());
668                         std::vector<int> mask_vec = getNumbers<int>(mask_values);
669
670                         CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
671
672                         setParams.setPermute(false);
673                         setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold);
674                     }
675                     else {
676                         CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
677                     }
678
679                     std::string activation = getParam<std::string>(layer_params, "activation", "linear");
680                     if (activation == "leaky")
681                     {
682                         setParams.setReLU();
683                     }
684                     else if (activation != "linear")
685                         CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
686
687                     net->out_channels_vec[layers_counter] = current_channels;
688                 }
689
690                 return true;
691             }
692
693             bool ReadDarknetFromWeightsStream(std::istream &ifile, NetParameter *net)
694             {
695                 int32_t major_ver, minor_ver, revision;
696                 ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t));
697                 ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t));
698                 ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t));
699
700                 uint64_t seen;
701                 if ((major_ver * 10 + minor_ver) >= 2) {
702                     ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t));
703                 }
704                 else {
705                     int32_t iseen = 0;
706                     ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t));
707                     seen = iseen;
708                 }
709                 bool transpose = (major_ver > 1000) || (minor_ver > 1000);
710                 if(transpose)
711                     CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented");
712
713                 int current_channels = net->channels;
714                 int cv_layers_counter = -1;
715                 int darknet_layers_counter = -1;
716
717                 setLayersParams setParams(net);
718
719                 typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
720                 for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
721                     ++darknet_layers_counter;
722                     ++cv_layers_counter;
723                     std::map<std::string, std::string> &layer_params = i->second;
724                     std::string layer_type = layer_params["type"];
725
726                     if (layer_type == "convolutional")
727                     {
728                         int kernel_size = getParam<int>(layer_params, "size", -1);
729                         int filters = getParam<int>(layer_params, "filters", -1);
730                         bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
731
732                         CV_Assert(kernel_size > 0 && filters > 0);
733                         CV_Assert(current_channels > 0);
734
735                         size_t const weights_size = filters * current_channels * kernel_size * kernel_size;
736                         int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size };
737                         cv::Mat weightsBlob;
738                         weightsBlob.create(4, sizes_weights, CV_32F);
739                         CV_Assert(weightsBlob.isContinuous());
740
741                         cv::Mat meanData_mat(1, filters, CV_32F);       // mean
742                         cv::Mat stdData_mat(1, filters, CV_32F);        // variance
743                         cv::Mat weightsData_mat(1, filters, CV_32F);// scale
744                         cv::Mat biasData_mat(1, filters, CV_32F);       // bias
745
746                         ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters);
747                         if (use_batch_normalize) {
748                             ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters);
749                             ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters);
750                             ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters);
751                         }
752                         ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size);
753
754                         // set convolutional weights
755                         std::vector<cv::Mat> conv_blobs;
756                         conv_blobs.push_back(weightsBlob);
757                         if (!use_batch_normalize) {
758                             // use BIAS in any case
759                             conv_blobs.push_back(biasData_mat);
760                         }
761                         setParams.setLayerBlobs(cv_layers_counter, conv_blobs);
762
763                         // set batch normalize (mean, variance, scale, bias)
764                         if (use_batch_normalize) {
765                             ++cv_layers_counter;
766                             std::vector<cv::Mat> bn_blobs;
767                             bn_blobs.push_back(meanData_mat);
768                             bn_blobs.push_back(stdData_mat);
769                             bn_blobs.push_back(weightsData_mat);
770                             bn_blobs.push_back(biasData_mat);
771                             setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
772                         }
773                     }
774                     if (layer_type == "region" || layer_type == "yolo")
775                     {
776                         ++cv_layers_counter;  // For permute.
777                     }
778
779                     std::string activation = getParam<std::string>(layer_params, "activation", "linear");
780                     if(activation == "leaky")
781                         ++cv_layers_counter;  // For ReLU
782
783                     current_channels = net->out_channels_vec[darknet_layers_counter];
784                 }
785                 return true;
786             }
787
788         }
789
790
791         void ReadNetParamsFromCfgStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
792         {
793             if (!darknet::ReadDarknetFromCfgStream(ifile, net)) {
794                 CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
795             }
796         }
797
798         void ReadNetParamsFromBinaryStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
799         {
800             if (!darknet::ReadDarknetFromWeightsStream(ifile, net)) {
801                 CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
802             }
803         }
804     }
805 }