1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
12 // (3-clause BSD License)
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistributions of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistributions in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * Neither the names of the copyright holders nor the names of the contributors
28 // may be used to endorse or promote products derived from this software
29 // without specific prior written permission.
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall copyright holders or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
44 /*M///////////////////////////////////////////////////////////////////////////////////////
47 //Copyright (c) 2017 Joseph Redmon
49 //Permission is hereby granted, free of charge, to any person obtaining a copy
50 //of this software and associated documentation files (the "Software"), to deal
51 //in the Software without restriction, including without limitation the rights
52 //to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
53 //copies of the Software, and to permit persons to whom the Software is
54 //furnished to do so, subject to the following conditions:
56 //The above copyright notice and this permission notice shall be included in all
57 //copies or substantial portions of the Software.
59 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
60 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
61 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
62 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
63 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
64 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
69 #include <opencv2/core.hpp>
75 #include "darknet_io.hpp"
82 T getParam(const std::map<std::string, std::string> ¶ms, const std::string param_name, T init_val)
84 std::map<std::string, std::string>::const_iterator it = params.find(param_name);
85 if (it != params.end()) {
86 std::stringstream ss(it->second);
92 class setLayersParams {
96 std::string last_layer;
97 std::vector<std::string> fused_layer_names;
100 setLayersParams(NetParameter *_net, std::string _first_layer = "data") :
101 net(_net), layer_id(0), last_layer(_first_layer)
104 void setLayerBlobs(int i, std::vector<cv::Mat> blobs)
106 cv::dnn::experimental_dnn_v1::LayerParams ¶ms = net->layers[i].layerParams;
107 params.blobs = blobs;
110 cv::dnn::experimental_dnn_v1::LayerParams getParamConvolution(int kernel, int pad,
111 int stride, int filters_num)
113 cv::dnn::experimental_dnn_v1::LayerParams params;
114 params.name = "Convolution-name";
115 params.type = "Convolution";
117 params.set<int>("kernel_size", kernel);
118 params.set<int>("pad", pad);
119 params.set<int>("stride", stride);
121 params.set<bool>("bias_term", false); // true only if(BatchNorm == false)
122 params.set<int>("num_output", filters_num);
128 void setConvolution(int kernel, int pad, int stride,
129 int filters_num, int channels_num, int use_batch_normalize, int use_relu)
131 cv::dnn::experimental_dnn_v1::LayerParams conv_param =
132 getParamConvolution(kernel, pad, stride, filters_num);
134 darknet::LayerParameter lp;
135 std::string layer_name = cv::format("conv_%d", layer_id);
137 // use BIAS in any case
138 if (!use_batch_normalize) {
139 conv_param.set<bool>("bias_term", true);
142 lp.layer_name = layer_name;
143 lp.layer_type = conv_param.type;
144 lp.layerParams = conv_param;
145 lp.bottom_indexes.push_back(last_layer);
146 last_layer = layer_name;
147 net->layers.push_back(lp);
149 if (use_batch_normalize)
151 cv::dnn::experimental_dnn_v1::LayerParams bn_param;
153 bn_param.name = "BatchNorm-name";
154 bn_param.type = "BatchNorm";
155 bn_param.set<bool>("has_weight", true);
156 bn_param.set<bool>("has_bias", true);
157 bn_param.set<float>("eps", 1E-6); // .000001f in Darknet Yolo
159 darknet::LayerParameter lp;
160 std::string layer_name = cv::format("bn_%d", layer_id);
161 lp.layer_name = layer_name;
162 lp.layer_type = bn_param.type;
163 lp.layerParams = bn_param;
164 lp.bottom_indexes.push_back(last_layer);
165 last_layer = layer_name;
166 net->layers.push_back(lp);
171 cv::dnn::experimental_dnn_v1::LayerParams activation_param;
172 activation_param.set<float>("negative_slope", 0.1f);
173 activation_param.name = "ReLU-name";
174 activation_param.type = "ReLU";
176 darknet::LayerParameter lp;
177 std::string layer_name = cv::format("relu_%d", layer_id);
178 lp.layer_name = layer_name;
179 lp.layer_type = activation_param.type;
180 lp.layerParams = activation_param;
181 lp.bottom_indexes.push_back(last_layer);
182 last_layer = layer_name;
183 net->layers.push_back(lp);
187 fused_layer_names.push_back(last_layer);
190 void setMaxpool(size_t kernel, size_t pad, size_t stride)
192 cv::dnn::experimental_dnn_v1::LayerParams maxpool_param;
193 maxpool_param.set<cv::String>("pool", "max");
194 maxpool_param.set<int>("kernel_size", kernel);
195 maxpool_param.set<int>("pad", pad);
196 maxpool_param.set<int>("stride", stride);
197 maxpool_param.set<cv::String>("pad_mode", "SAME");
198 maxpool_param.name = "Pooling-name";
199 maxpool_param.type = "Pooling";
200 darknet::LayerParameter lp;
202 std::string layer_name = cv::format("pool_%d", layer_id);
203 lp.layer_name = layer_name;
204 lp.layer_type = maxpool_param.type;
205 lp.layerParams = maxpool_param;
206 lp.bottom_indexes.push_back(last_layer);
207 last_layer = layer_name;
208 net->layers.push_back(lp);
210 fused_layer_names.push_back(last_layer);
213 void setConcat(int number_of_inputs, int *input_indexes)
215 cv::dnn::experimental_dnn_v1::LayerParams concat_param;
216 concat_param.name = "Concat-name";
217 concat_param.type = "Concat";
218 concat_param.set<int>("axis", 1); // channels are in axis = 1
220 darknet::LayerParameter lp;
222 std::string layer_name = cv::format("concat_%d", layer_id);
223 lp.layer_name = layer_name;
224 lp.layer_type = concat_param.type;
225 lp.layerParams = concat_param;
226 for (int i = 0; i < number_of_inputs; ++i)
227 lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i]));
229 last_layer = layer_name;
230 net->layers.push_back(lp);
233 fused_layer_names.push_back(last_layer);
236 void setIdentity(int bottom_index)
238 cv::dnn::experimental_dnn_v1::LayerParams identity_param;
239 identity_param.name = "Identity-name";
240 identity_param.type = "Identity";
242 darknet::LayerParameter lp;
244 std::string layer_name = cv::format("identity_%d", layer_id);
245 lp.layer_name = layer_name;
246 lp.layer_type = identity_param.type;
247 lp.layerParams = identity_param;
248 lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index));
250 last_layer = layer_name;
251 net->layers.push_back(lp);
254 fused_layer_names.push_back(last_layer);
257 void setReorg(int stride)
259 cv::dnn::experimental_dnn_v1::LayerParams reorg_params;
260 reorg_params.name = "Reorg-name";
261 reorg_params.type = "Reorg";
262 reorg_params.set<int>("reorg_stride", stride);
264 darknet::LayerParameter lp;
265 std::string layer_name = cv::format("reorg_%d", layer_id);
266 lp.layer_name = layer_name;
267 lp.layer_type = reorg_params.type;
268 lp.layerParams = reorg_params;
269 lp.bottom_indexes.push_back(last_layer);
270 last_layer = layer_name;
272 net->layers.push_back(lp);
275 fused_layer_names.push_back(last_layer);
280 cv::dnn::experimental_dnn_v1::LayerParams permute_params;
281 permute_params.name = "Permute-name";
282 permute_params.type = "Permute";
283 int permute[] = { 0, 2, 3, 1 };
284 cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4);
286 permute_params.set("order", paramOrder);
288 darknet::LayerParameter lp;
289 std::string layer_name = cv::format("premute_%d", layer_id);
290 lp.layer_name = layer_name;
291 lp.layer_type = permute_params.type;
292 lp.layerParams = permute_params;
293 lp.bottom_indexes.push_back(last_layer);
294 last_layer = layer_name;
295 net->layers.push_back(lp);
298 fused_layer_names.push_back(last_layer);
301 void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData)
303 cv::dnn::experimental_dnn_v1::LayerParams region_param;
304 region_param.name = "Region-name";
305 region_param.type = "Region";
307 region_param.set<float>("thresh", thresh);
308 region_param.set<int>("coords", coords);
309 region_param.set<int>("classes", classes);
310 region_param.set<int>("anchors", anchors);
311 region_param.set<int>("classfix", classfix);
312 region_param.set<bool>("softmax_tree", softmax_tree);
313 region_param.set<bool>("softmax", softmax);
315 cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone();
316 region_param.blobs.push_back(biasData_mat);
318 darknet::LayerParameter lp;
319 std::string layer_name = "detection_out";
320 lp.layer_name = layer_name;
321 lp.layer_type = region_param.type;
322 lp.layerParams = region_param;
323 lp.bottom_indexes.push_back(last_layer);
324 last_layer = layer_name;
325 net->layers.push_back(lp);
328 fused_layer_names.push_back(last_layer);
332 std::string escapeString(const std::string &src)
335 for (size_t i = 0; i < src.size(); ++i)
336 if (src[i] > ' ' && src[i] <= 'z')
342 std::vector<T> getNumbers(const std::string &src)
345 std::stringstream ss(src);
347 for (std::string str; std::getline(ss, str, ',');) {
348 std::stringstream line(str);
356 bool ReadDarknetFromCfgFile(const char *cfgFile, NetParameter *net)
362 bool read_net = false;
363 int layers_counter = -1;
364 for (std::string line; std::getline(ifile, line);) {
365 line = escapeString(line);
366 if (line.empty()) continue;
372 if (line == "[net]") {
379 const size_t layer_type_size = line.find("]") - 1;
380 CV_Assert(layer_type_size < line.size());
381 std::string layer_type = line.substr(1, layer_type_size);
382 net->layers_cfg[layers_counter]["type"] = layer_type;
387 const size_t separator_index = line.find('=');
388 CV_Assert(separator_index < line.size());
389 if (separator_index != std::string::npos) {
390 std::string name = line.substr(0, separator_index);
391 std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1));
392 name = escapeString(name);
393 value = escapeString(value);
394 if (name.empty() || value.empty()) continue;
396 net->net_cfg[name] = value;
398 net->layers_cfg[layers_counter][name] = value;
403 std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"];
404 std::vector<float> vec = getNumbers<float>(anchors);
405 std::map<std::string, std::string> &net_params = net->net_cfg;
406 net->width = getParam(net_params, "width", 416);
407 net->height = getParam(net_params, "height", 416);
408 net->channels = getParam(net_params, "channels", 3);
409 CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0);
414 int current_channels = net->channels;
415 net->out_channels_vec.resize(net->layers_cfg.size());
417 int layers_counter = -1;
419 setLayersParams setParams(net);
421 typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
422 for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
424 std::map<std::string, std::string> &layer_params = i->second;
425 std::string layer_type = layer_params["type"];
427 if (layer_type == "convolutional")
429 int kernel_size = getParam<int>(layer_params, "size", -1);
430 int pad = getParam<int>(layer_params, "pad", 0);
431 int stride = getParam<int>(layer_params, "stride", 1);
432 int filters = getParam<int>(layer_params, "filters", -1);
433 std::string activation = getParam<std::string>(layer_params, "activation", "linear");
434 bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
435 if(activation != "linear" && activation != "leaky")
436 CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
437 int flipped = getParam<int>(layer_params, "flipped", 0);
439 CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
441 // correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file
442 if (kernel_size < 3) pad = 0;
444 CV_Assert(kernel_size > 0 && filters > 0);
445 CV_Assert(current_channels > 0);
447 setParams.setConvolution(kernel_size, pad, stride, filters, current_channels,
448 batch_normalize, activation == "leaky");
450 current_channels = filters;
452 else if (layer_type == "maxpool")
454 int kernel_size = getParam<int>(layer_params, "size", 2);
455 int stride = getParam<int>(layer_params, "stride", 2);
456 int pad = getParam<int>(layer_params, "pad", 0);
457 setParams.setMaxpool(kernel_size, pad, stride);
459 else if (layer_type == "route")
461 std::string bottom_layers = getParam<std::string>(layer_params, "layers", "");
462 CV_Assert(!bottom_layers.empty());
463 std::vector<int> layers_vec = getNumbers<int>(bottom_layers);
465 current_channels = 0;
466 for (size_t k = 0; k < layers_vec.size(); ++k) {
467 layers_vec[k] += layers_counter;
468 current_channels += net->out_channels_vec[layers_vec[k]];
471 if (layers_vec.size() == 1)
472 setParams.setIdentity(layers_vec.at(0));
474 setParams.setConcat(layers_vec.size(), layers_vec.data());
476 else if (layer_type == "reorg")
478 int stride = getParam<int>(layer_params, "stride", 2);
479 current_channels = current_channels * (stride*stride);
481 setParams.setReorg(stride);
483 else if (layer_type == "region")
485 float thresh = 0.001; // in the original Darknet is equal to the detection threshold set by the user
486 int coords = getParam<int>(layer_params, "coords", 4);
487 int classes = getParam<int>(layer_params, "classes", -1);
488 int num_of_anchors = getParam<int>(layer_params, "num", -1);
489 int classfix = getParam<int>(layer_params, "classfix", 0);
490 bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1);
491 bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0);
493 std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
494 CV_Assert(!anchors_values.empty());
495 std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
497 CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
499 setParams.setPermute();
500 setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data());
503 CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
505 net->out_channels_vec[layers_counter] = current_channels;
512 bool ReadDarknetFromWeightsFile(const char *darknetModel, NetParameter *net)
515 ifile.open(darknetModel, std::ios::binary);
516 CV_Assert(ifile.is_open());
518 int32_t major_ver, minor_ver, revision;
519 ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t));
520 ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t));
521 ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t));
524 if ((major_ver * 10 + minor_ver) >= 2) {
525 ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t));
529 ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t));
532 bool transpose = (major_ver > 1000) || (minor_ver > 1000);
534 CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented");
536 int current_channels = net->channels;
537 int cv_layers_counter = -1;
538 int darknet_layers_counter = -1;
540 setLayersParams setParams(net);
542 typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
543 for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
544 ++darknet_layers_counter;
546 std::map<std::string, std::string> &layer_params = i->second;
547 std::string layer_type = layer_params["type"];
549 if (layer_type == "convolutional")
551 int kernel_size = getParam<int>(layer_params, "size", -1);
552 int filters = getParam<int>(layer_params, "filters", -1);
553 std::string activation = getParam<std::string>(layer_params, "activation", "linear");
554 bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
556 CV_Assert(kernel_size > 0 && filters > 0);
557 CV_Assert(current_channels > 0);
559 size_t const weights_size = filters * current_channels * kernel_size * kernel_size;
560 int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size };
562 weightsBlob.create(4, sizes_weights, CV_32F);
563 CV_Assert(weightsBlob.isContinuous());
565 cv::Mat meanData_mat(1, filters, CV_32F); // mean
566 cv::Mat stdData_mat(1, filters, CV_32F); // variance
567 cv::Mat weightsData_mat(1, filters, CV_32F);// scale
568 cv::Mat biasData_mat(1, filters, CV_32F); // bias
570 ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters);
571 if (use_batch_normalize) {
572 ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters);
573 ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters);
574 ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters);
576 ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size);
578 // set convolutional weights
579 std::vector<cv::Mat> conv_blobs;
580 conv_blobs.push_back(weightsBlob);
581 if (!use_batch_normalize) {
582 // use BIAS in any case
583 conv_blobs.push_back(biasData_mat);
585 setParams.setLayerBlobs(cv_layers_counter, conv_blobs);
587 // set batch normalize (mean, variance, scale, bias)
588 if (use_batch_normalize) {
590 std::vector<cv::Mat> bn_blobs;
591 bn_blobs.push_back(meanData_mat);
592 bn_blobs.push_back(stdData_mat);
593 bn_blobs.push_back(weightsData_mat);
594 bn_blobs.push_back(biasData_mat);
595 setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
598 if(activation == "leaky")
601 current_channels = net->out_channels_vec[darknet_layers_counter];
609 void ReadNetParamsFromCfgFileOrDie(const char *cfgFile, darknet::NetParameter *net)
611 if (!darknet::ReadDarknetFromCfgFile(cfgFile, net)) {
612 CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter file: " + std::string(cfgFile));
616 void ReadNetParamsFromBinaryFileOrDie(const char *darknetModel, darknet::NetParameter *net)
618 if (!darknet::ReadDarknetFromWeightsFile(darknetModel, net)) {
619 CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter file: " + std::string(darknetModel));