1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 * @brief a header file with common samples functionality
29 #include <ie_plugin_dispatcher.hpp>
30 #include <ie_plugin_ptr.hpp>
31 #include <cpp/ie_cnn_net_reader.h>
32 #include <cpp/ie_infer_request.hpp>
33 #include <ie_device.hpp>
40 #define UNUSED __attribute__((unused))
45 * @brief This class represents a console error listener.
48 class ConsoleErrorListener : public InferenceEngine::IErrorListener {
50 * @brief The plugin calls this method with a null terminated error message (in case of error)
51 * @param msg Error message
53 void onError(const char *msg) noexcept override {
54 std::clog << "Plugin message: " << msg << std::endl;
59 * @brief Trims from both ends (in place)
60 * @param s - string to trim
61 * @return trimmed string
63 inline std::string &trim(std::string &s) {
64 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
65 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
70 * @brief Converts string to TargetDevice
71 * @param deviceName - string value representing device
72 * @return TargetDevice value that corresponds to input string.
73 * eDefault in case no corresponding value was found
75 static InferenceEngine::TargetDevice getDeviceFromStr(const std::string &deviceName) {
76 return InferenceEngine::TargetDeviceInfo::fromStr(deviceName);
80 * @brief Loads plugin from directories
81 * @param pluginDirs - plugin paths
82 * @param plugin - plugin name
83 * @param device - device to infer on
84 * @return Plugin pointer
86 static InferenceEngine::InferenceEnginePluginPtr selectPlugin(const std::vector<file_name_t> &pluginDirs,
87 const file_name_t &plugin,
88 InferenceEngine::TargetDevice device) {
89 InferenceEngine::PluginDispatcher dispatcher(pluginDirs);
91 if (!plugin.empty()) {
92 return dispatcher.getPluginByName(plugin);
94 return dispatcher.getSuitablePlugin(device);
99 * @brief Loads plugin from directories
100 * @param pluginDirs - plugin paths
101 * @param plugin - plugin name
102 * @param device - string representation of device to infer on
103 * @return Plugin pointer
105 static UNUSED InferenceEngine::InferenceEnginePluginPtr selectPlugin(const std::vector<file_name_t> &pluginDirs,
106 const file_name_t &plugin,
107 const std::string &device) {
108 return selectPlugin(pluginDirs, plugin, getDeviceFromStr(device));
112 * @brief Gets filename without extension
113 * @param filepath - full file name
114 * @return filename without extension
116 static UNUSED std::string fileNameNoExt(const std::string &filepath) {
117 auto pos = filepath.rfind('.');
118 if (pos == std::string::npos) return filepath;
119 return filepath.substr(0, pos);
123 * @brief Get extension from filename
124 * @param filename - name of the file which extension should be extracted
125 * @return string with extracted file extension
127 inline std::string fileExt(const std::string& filename) {
128 auto pos = filename.rfind('.');
129 if (pos == std::string::npos) return "";
130 return filename.substr(pos + 1);
133 static UNUSED std::ostream &operator<<(std::ostream &os, const InferenceEngine::Version *version) {
134 os << "\n\tAPI version ............ ";
135 if (nullptr == version) {
138 os << version->apiVersion.major << "." << version->apiVersion.minor;
139 if (nullptr != version->buildNumber) {
140 os << "\n\t" << "Build .................. " << version->buildNumber;
142 if (nullptr != version->description) {
143 os << "\n\t" << "Description ....... " << version->description;
150 * @class PluginVersion
151 * @brief A PluginVersion class stores plugin version and initialization status
153 struct PluginVersion : public InferenceEngine::Version {
154 bool initialized = false;
156 explicit PluginVersion(const InferenceEngine::Version *ver) {
157 if (nullptr == ver) {
160 InferenceEngine::Version::operator=(*ver);
164 operator bool() const noexcept {
169 static UNUSED std::ostream &operator<<(std::ostream &os, const PluginVersion &version) {
170 os << "\tPlugin version ......... ";
174 os << version.apiVersion.major << "." << version.apiVersion.minor;
177 os << "\n\tPlugin name ............ ";
178 if (!version || version.description == nullptr) {
181 os << version.description;
184 os << "\n\tPlugin build ........... ";
185 if (!version || version.buildNumber == nullptr) {
188 os << version.buildNumber;
194 inline void printPluginVersion(InferenceEngine::InferenceEnginePluginPtr ptr, std::ostream& stream) {
195 const InferenceEngine::Version *pluginVersion = nullptr;
196 ptr->GetVersion(pluginVersion);
197 stream << pluginVersion << std::endl;
200 static UNUSED std::vector<std::vector<size_t>> blobToImageOutputArray(InferenceEngine::TBlob<float>::Ptr output,
201 size_t *pWidth, size_t *pHeight,
203 std::vector<std::vector<size_t>> outArray;
204 size_t W = output->dims().at(0);
205 size_t H = output->dims().at(1);
206 size_t C = output->dims().at(2);
209 const float *outData = output->data();
210 for (unsigned h = 0; h < H; h++) {
211 std::vector<size_t> row;
212 for (unsigned w = 0; w < W; w++) {
213 float max_value = outData[h * W + w];
215 for (size_t c = 1; c < C; c++) {
216 size_t dataIndex = c * H * W + h * W + w;
217 if (outData[dataIndex] > max_value) {
219 max_value = outData[dataIndex];
222 row.push_back(index);
224 outArray.push_back(row);
227 if (pWidth != nullptr) *pWidth = W;
228 if (pHeight != nullptr) *pHeight = H;
229 if (pChannels != nullptr) *pChannels = C;
236 * @brief A Color class stores channels of a given color
246 * A default constructor.
247 * @param r - value for red channel
248 * @param g - value for green channel
249 * @param b - value for blue channel
251 Color(unsigned char r,
253 unsigned char b) : _r(r), _g(g), _b(b) {}
255 inline unsigned char red() {
259 inline unsigned char blue() {
263 inline unsigned char green() {
268 // TODO : keep only one version of writeOutputBMP
271 * @brief Writes output data to image
272 * @param name - image name
273 * @param data - output data
274 * @param classesNum - the number of classes
275 * @return false if error else true
277 static UNUSED void writeOutputBmp(std::vector<std::vector<size_t>> data, size_t classesNum, std::ostream &outFile) {
278 unsigned int seed = (unsigned int) time(NULL);
279 // Known colors for training classes from Cityscape dataset
280 static std::vector<Color> colors = {
304 while (classesNum > colors.size()) {
305 static std::mt19937 rng(seed);
306 std::uniform_int_distribution<int> dist(0, 255);
307 Color color(dist(rng), dist(rng), dist(rng));
308 colors.push_back(color);
311 unsigned char file[14] = {
313 0, 0, 0, 0, // size in bytes
316 40 + 14, 0, 0, 0 // start of data offset
318 unsigned char info[40] = {
319 40, 0, 0, 0, // info hd size
321 0, 0, 0, 0, // height
322 1, 0, // number color planes
323 24, 0, // bits per pixel
324 0, 0, 0, 0, // compression is none
325 0, 0, 0, 0, // image bits size
326 0x13, 0x0B, 0, 0, // horz resolution in pixel / m
327 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
328 0, 0, 0, 0, // #colors in palette
329 0, 0, 0, 0, // #important colors
332 auto height = data.size();
333 auto width = data.at(0).size();
335 if (height > (size_t) std::numeric_limits<int32_t>::max || width > (size_t) std::numeric_limits<int32_t>::max) {
336 THROW_IE_EXCEPTION << "File size is too big: " << height << " X " << width;
339 int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
340 int sizeData = static_cast<int>(width * height * 3 + height * padSize);
341 int sizeAll = sizeData + sizeof(file) + sizeof(info);
343 file[2] = (unsigned char) (sizeAll);
344 file[3] = (unsigned char) (sizeAll >> 8);
345 file[4] = (unsigned char) (sizeAll >> 16);
346 file[5] = (unsigned char) (sizeAll >> 24);
348 info[4] = (unsigned char) (width);
349 info[5] = (unsigned char) (width >> 8);
350 info[6] = (unsigned char) (width >> 16);
351 info[7] = (unsigned char) (width >> 24);
353 int32_t negativeHeight = -(int32_t) height;
354 info[8] = (unsigned char) (negativeHeight);
355 info[9] = (unsigned char) (negativeHeight >> 8);
356 info[10] = (unsigned char) (negativeHeight >> 16);
357 info[11] = (unsigned char) (negativeHeight >> 24);
359 info[20] = (unsigned char) (sizeData);
360 info[21] = (unsigned char) (sizeData >> 8);
361 info[22] = (unsigned char) (sizeData >> 16);
362 info[23] = (unsigned char) (sizeData >> 24);
364 outFile.write(reinterpret_cast<char *>(file), sizeof(file));
365 outFile.write(reinterpret_cast<char *>(info), sizeof(info));
367 unsigned char pad[3] = {0, 0, 0};
369 for (size_t y = 0; y < height; y++) {
370 for (size_t x = 0; x < width; x++) {
371 unsigned char pixel[3];
372 size_t index = data.at(y).at(x);
373 pixel[0] = colors.at(index).red();
374 pixel[1] = colors.at(index).green();
375 pixel[2] = colors.at(index).blue();
376 outFile.write(reinterpret_cast<char *>(pixel), 3);
378 outFile.write(reinterpret_cast<char *>(pad), padSize);
383 * @brief Writes output data to BMP image
384 * @param name - image name
385 * @param data - output data
386 * @param height - height of the target image
387 * @param width - width of the target image
388 * @return false if error else true
390 static UNUSED bool writeOutputBmp(std::string name, unsigned char *data, size_t height, size_t width) {
391 std::ofstream outFile;
392 outFile.open(name, std::ofstream::binary);
393 if (!outFile.is_open()) {
397 unsigned char file[14] = {
399 0, 0, 0, 0, // size in bytes
402 40 + 14, 0, 0, 0 // start of data offset
404 unsigned char info[40] = {
405 40, 0, 0, 0, // info hd size
407 0, 0, 0, 0, // height
408 1, 0, // number color planes
409 24, 0, // bits per pixel
410 0, 0, 0, 0, // compression is none
411 0, 0, 0, 0, // image bits size
412 0x13, 0x0B, 0, 0, // horz resolution in pixel / m
413 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
414 0, 0, 0, 0, // #colors in palette
415 0, 0, 0, 0, // #important colors
418 if (height > (size_t)std::numeric_limits<int32_t>::max || width > (size_t)std::numeric_limits<int32_t>::max) {
419 THROW_IE_EXCEPTION << "File size is too big: " << height << " X " << width;
422 int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
423 int sizeData = static_cast<int>(width * height * 3 + height * padSize);
424 int sizeAll = sizeData + sizeof(file) + sizeof(info);
426 file[2] = (unsigned char)(sizeAll);
427 file[3] = (unsigned char)(sizeAll >> 8);
428 file[4] = (unsigned char)(sizeAll >> 16);
429 file[5] = (unsigned char)(sizeAll >> 24);
431 info[4] = (unsigned char)(width);
432 info[5] = (unsigned char)(width >> 8);
433 info[6] = (unsigned char)(width >> 16);
434 info[7] = (unsigned char)(width >> 24);
436 int32_t negativeHeight = -(int32_t)height;
437 info[8] = (unsigned char)(negativeHeight);
438 info[9] = (unsigned char)(negativeHeight >> 8);
439 info[10] = (unsigned char)(negativeHeight >> 16);
440 info[11] = (unsigned char)(negativeHeight >> 24);
442 info[20] = (unsigned char)(sizeData);
443 info[21] = (unsigned char)(sizeData >> 8);
444 info[22] = (unsigned char)(sizeData >> 16);
445 info[23] = (unsigned char)(sizeData >> 24);
447 outFile.write(reinterpret_cast<char *>(file), sizeof(file));
448 outFile.write(reinterpret_cast<char *>(info), sizeof(info));
450 unsigned char pad[3] = { 0, 0, 0 };
452 for (size_t y = 0; y < height; y++) {
453 for (size_t x = 0; x < width; x++) {
454 unsigned char pixel[3];
455 pixel[0] = data[y * width * 3 + x * 3];
456 pixel[1] = data[y * width * 3 + x * 3 + 1];
457 pixel[2] = data[y * width * 3 + x * 3 + 2];
459 outFile.write(reinterpret_cast<char *>(pixel), 3);
461 outFile.write(reinterpret_cast<char *>(pad), padSize);
468 * @brief Adds colored rectangles to the image
469 * @param data - data where rectangles are put
470 * @param height - height of the rectangle
471 * @param width - width of the rectangle
472 * @param rectangles - vector points for the rectangle, should be 4x compared to num classes
473 * @param classes - vector of classes
474 * @param thickness - thickness of a line (in pixels) to be used for bounding boxes
476 static UNUSED void addRectangles(unsigned char *data, size_t height, size_t width, std::vector<int> rectangles, std::vector<int> classes, int thickness = 1) {
477 std::vector<Color> colors = { // colors to be used for bounding boxes
500 if (rectangles.size() % 4 != 0 || rectangles.size() / 4 != classes.size()) {
504 for (size_t i = 0; i < classes.size(); i++) {
505 int x = rectangles.at(i * 4);
506 int y = rectangles.at(i * 4 + 1);
507 int w = rectangles.at(i * 4 + 2);
508 int h = rectangles.at(i * 4 + 3);
510 int cls = classes.at(i) % colors.size(); // color of a bounding box line
517 if (static_cast<std::size_t>(x) >= width) { x = width - 1; w = 0; thickness = 1; }
518 if (static_cast<std::size_t>(y) >= height) { y = height - 1; h = 0; thickness = 1; }
520 if (static_cast<std::size_t>(x + w) >= width) { w = width - x - 1; }
521 if (static_cast<std::size_t>(y + h) >= height) { h = height - y - 1; }
523 thickness = std::min(std::min(thickness, w / 2 + 1), h / 2 + 1);
527 for (int t = 0; t < thickness; t++) {
528 shift_first = (y + t) * width * 3;
529 shift_second = (y + h - t) * width * 3;
530 for (int ii = x; ii < x + w + 1; ii++) {
531 data[shift_first + ii * 3] = colors.at(cls).red();
532 data[shift_first + ii * 3 + 1] = colors.at(cls).green();
533 data[shift_first + ii * 3 + 2] = colors.at(cls).blue();
534 data[shift_second + ii * 3] = colors.at(cls).red();
535 data[shift_second + ii * 3 + 1] = colors.at(cls).green();
536 data[shift_second + ii * 3 + 2] = colors.at(cls).blue();
540 for (int t = 0; t < thickness; t++) {
541 shift_first = (x + t) * 3;
542 shift_second = (x + w - t) * 3;
543 for (int ii = y; ii < y + h + 1; ii++) {
544 data[shift_first + ii * width * 3] = colors.at(cls).red();
545 data[shift_first + ii * width * 3 + 1] = colors.at(cls).green();
546 data[shift_first + ii * width * 3 + 2] = colors.at(cls).blue();
547 data[shift_second + ii * width * 3] = colors.at(cls).red();
548 data[shift_second + ii * width * 3 + 1] = colors.at(cls).green();
549 data[shift_second + ii * width * 3 + 2] = colors.at(cls).blue();
558 * Write output data to image
559 * \param name - image name
560 * \param data - output data
561 * \param classesNum - the number of classes
562 * \return false if error else true
565 static UNUSED bool writeOutputBmp(unsigned char *data, size_t height, size_t width, std::ostream &outFile) {
566 unsigned char file[14] = {
568 0, 0, 0, 0, // size in bytes
571 40+14, 0, 0, 0 // start of data offset
573 unsigned char info[40] = {
574 40, 0, 0, 0, // info hd size
576 0, 0, 0, 0, // height
577 1, 0, // number color planes
578 24, 0, // bits per pixel
579 0, 0, 0, 0, // compression is none
580 0, 0, 0, 0, // image bits size
581 0x13, 0x0B, 0, 0, // horz resolution in pixel / m
582 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
583 0, 0, 0, 0, // #colors in palette
584 0, 0, 0, 0, // #important colors
587 if (height > (size_t)std::numeric_limits<int32_t>::max || width > (size_t)std::numeric_limits<int32_t>::max) {
588 THROW_IE_EXCEPTION << "File size is too big: " << height << " X " << width;
591 int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
592 int sizeData = static_cast<int>(width * height * 3 + height * padSize);
593 int sizeAll = sizeData + sizeof(file) + sizeof(info);
595 file[ 2] = (unsigned char)(sizeAll );
596 file[ 3] = (unsigned char)(sizeAll >> 8);
597 file[ 4] = (unsigned char)(sizeAll >> 16);
598 file[ 5] = (unsigned char)(sizeAll >> 24);
600 info[ 4] = (unsigned char)(width );
601 info[ 5] = (unsigned char)(width >> 8);
602 info[ 6] = (unsigned char)(width >> 16);
603 info[ 7] = (unsigned char)(width >> 24);
605 int32_t negativeHeight = -(int32_t)height;
606 info[ 8] = (unsigned char)(negativeHeight );
607 info[ 9] = (unsigned char)(negativeHeight >> 8);
608 info[10] = (unsigned char)(negativeHeight >> 16);
609 info[11] = (unsigned char)(negativeHeight >> 24);
611 info[20] = (unsigned char)(sizeData );
612 info[21] = (unsigned char)(sizeData >> 8);
613 info[22] = (unsigned char)(sizeData >> 16);
614 info[23] = (unsigned char)(sizeData >> 24);
616 outFile.write(reinterpret_cast<char*>(file), sizeof(file));
617 outFile.write(reinterpret_cast<char*>(info), sizeof(info));
619 unsigned char pad[3] = {0, 0, 0};
621 for (size_t y = 0; y < height; y++) {
622 for (size_t x = 0; x < width; x++) {
623 unsigned char pixel[3];
624 pixel[0] = data[y*width*3 + x*3];
625 pixel[1] = data[y*width*3 + x*3 + 1];
626 pixel[2] = data[y*width*3 + x*3 + 2];
627 outFile.write(reinterpret_cast<char *>(pixel), 3);
629 outFile.write(reinterpret_cast<char *>(pad), padSize);
635 inline double getDurationOf(std::function<void()> func) {
636 auto t0 = std::chrono::high_resolution_clock::now();
638 auto t1 = std::chrono::high_resolution_clock::now();
639 std::chrono::duration<float> fs = t1 - t0;
640 return std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1000>>>(fs).count();
643 static std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>>
644 perfCountersSorted(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap) {
645 using perfItem = std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>;
646 std::vector<perfItem> sorted;
647 for (auto &kvp : perfMap) sorted.push_back(kvp);
649 std::stable_sort(sorted.begin(), sorted.end(),
650 [](const perfItem& l, const perfItem& r) {
651 return l.second.execution_index < r.second.execution_index;
657 static UNUSED void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
658 std::ostream &stream,
659 bool bshowHeader = true) {
660 long long totalTime = 0;
661 // Print performance counts
663 stream << std::endl << "performance counts:" << std::endl << std::endl;
666 auto performanceMapSorted = perfCountersSorted(performanceMap);
668 for (const auto & it : performanceMapSorted) {
669 std::string toPrint(it.first);
670 const int maxLayerName = 30;
672 if (it.first.length() >= maxLayerName) {
673 toPrint = it.first.substr(0, maxLayerName - 4);
678 stream << std::setw(maxLayerName) << std::left << toPrint;
679 switch (it.second.status) {
680 case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
681 stream << std::setw(15) << std::left << "EXECUTED";
683 case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
684 stream << std::setw(15) << std::left << "NOT_RUN";
686 case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
687 stream << std::setw(15) << std::left << "OPTIMIZED_OUT";
690 stream << std::setw(30) << std::left << "layerType: " + std::string(it.second.layer_type) + " ";
691 stream << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.realTime_uSec);
692 stream << std::setw(20) << std::left << " cpu: " + std::to_string(it.second.cpu_uSec);
693 stream << " execType: " << it.second.exec_type << std::endl;
694 if (it.second.realTime_uSec > 0) {
695 totalTime += it.second.realTime_uSec;
698 stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime) << " microseconds" << std::endl;
701 static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream &stream) {
702 auto performanceMap = request.GetPerformanceCounts();
703 printPerformanceCounts(performanceMap, stream);
709 static UNUSED void printPerformanceCountsPlugin(InferenceEngine::InferenceEnginePluginPtr plugin, std::ostream &stream) {
710 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> performanceMap;
711 plugin->GetPerformanceCounts(performanceMap, nullptr);
712 printPerformanceCounts(performanceMap, stream);
716 * @brief This class represents an object that is found by an object detection net
718 class DetectedObject {
721 float xmin, xmax, ymin, ymax, prob;
724 DetectedObject(int _objectType, float _xmin, float _ymin, float _xmax, float _ymax, float _prob, bool _difficult = false)
725 : objectType(_objectType), xmin(_xmin), xmax(_xmax), ymin(_ymin), ymax(_ymax), prob(_prob), difficult(_difficult) {
728 DetectedObject(const DetectedObject& other) = default;
730 static float ioU(const DetectedObject& detectedObject1_, const DetectedObject& detectedObject2_) {
731 // Add small space to eliminate empty squares
732 float epsilon = 0; // 1e-5f;
734 DetectedObject detectedObject1(detectedObject1_.objectType,
735 (detectedObject1_.xmin - epsilon),
736 (detectedObject1_.ymin - epsilon),
737 (detectedObject1_.xmax- epsilon),
738 (detectedObject1_.ymax- epsilon), detectedObject1_.prob);
739 DetectedObject detectedObject2(detectedObject2_.objectType,
740 (detectedObject2_.xmin + epsilon),
741 (detectedObject2_.ymin + epsilon),
742 (detectedObject2_.xmax),
743 (detectedObject2_.ymax), detectedObject2_.prob);
745 if (detectedObject1.objectType != detectedObject2.objectType) {
746 // objects are different, so the result is 0
750 if (detectedObject1.xmax < detectedObject1.xmin) return 0.0;
751 if (detectedObject1.ymax < detectedObject1.ymin) return 0.0;
752 if (detectedObject2.xmax < detectedObject2.xmin) return 0.0;
753 if (detectedObject2.ymax < detectedObject2.ymin) return 0.0;
756 float xmin = (std::max)(detectedObject1.xmin, detectedObject2.xmin);
757 float ymin = (std::max)(detectedObject1.ymin, detectedObject2.ymin);
758 float xmax = (std::min)(detectedObject1.xmax, detectedObject2.xmax);
759 float ymax = (std::min)(detectedObject1.ymax, detectedObject2.ymax);
761 // Caffe adds 1 to every length if the box isn't normalized. So do we...
763 if (xmax > 1 || ymax > 1)
770 if ((xmax >= xmin) && (ymax >= ymin)) {
771 intr = (addendum + xmax - xmin) * (addendum + ymax - ymin);
777 float square1 = (addendum + detectedObject1.xmax - detectedObject1.xmin) * (addendum + detectedObject1.ymax - detectedObject1.ymin);
778 float square2 = (addendum + detectedObject2.xmax - detectedObject2.xmin) * (addendum + detectedObject2.ymax - detectedObject2.ymin);
780 float unn = square1 + square2 - intr;
782 return static_cast<float>(intr) / unn;
785 DetectedObject scale(float scale_x, float scale_y) const {
786 return DetectedObject(objectType, xmin * scale_x, ymin * scale_y, xmax * scale_x, ymax * scale_y, prob, difficult);
790 class ImageDescription {
792 const std::list<DetectedObject> alist;
793 const bool check_probs;
795 explicit ImageDescription(const std::list<DetectedObject> &_alist, bool _check_probs = false)
796 : alist(_alist), check_probs(_check_probs) {
799 static float ioUMultiple(const ImageDescription &detectedObjects, const ImageDescription &desiredObjects) {
800 const ImageDescription *detectedObjectsSmall, *detectedObjectsBig;
801 bool check_probs = desiredObjects.check_probs;
803 if (detectedObjects.alist.size() < desiredObjects.alist.size()) {
804 detectedObjectsSmall = &detectedObjects;
805 detectedObjectsBig = &desiredObjects;
807 detectedObjectsSmall = &desiredObjects;
808 detectedObjectsBig = &detectedObjects;
811 std::list<DetectedObject> doS = detectedObjectsSmall->alist;
812 std::list<DetectedObject> doB = detectedObjectsBig->alist;
814 float fullScore = 0.0f;
815 while (doS.size() > 0) {
817 std::list<DetectedObject>::iterator bestJ = doB.end();
818 for (auto j = doB.begin(); j != doB.end(); j++) {
819 float curscore = DetectedObject::ioU(*doS.begin(), *j);
820 if (score < curscore) {
828 if (bestJ != doB.end()) {
829 float mn = std::min((*bestJ).prob, (*doS.begin()).prob);
830 float mx = std::max((*bestJ).prob, (*doS.begin()).prob);
837 if (bestJ != doB.end()) doB.erase(bestJ);
838 fullScore += coeff * score;
840 fullScore /= detectedObjectsBig->alist.size();
846 ImageDescription scale(float scale_x, float scale_y) const {
847 std::list<DetectedObject> slist;
848 for (auto& dob : alist) {
849 slist.push_back(dob.scale(scale_x, scale_y));
851 return ImageDescription(slist, check_probs);
855 struct AveragePrecisionCalculator {
858 TruePositive, FalsePositive
862 * Here we count all TP and FP matches for all the classes in all the images.
864 std::map<int, std::vector<std::pair<double, MatchKind>>> matches;
866 std::map<int, int> N;
870 static bool SortBBoxDescend(const DetectedObject& bbox1, const DetectedObject& bbox2) {
871 return bbox1.prob > bbox2.prob;
874 static bool SortPairDescend(const std::pair<double, MatchKind>& p1, const std::pair<double, MatchKind>& p2) {
875 return p1.first > p2.first;
879 explicit AveragePrecisionCalculator(double _threshold) : threshold(_threshold) { }
884 void consumeImage(const ImageDescription &detectedObjects, const ImageDescription &desiredObjects) {
885 // Collecting IoU values
886 std::vector<bool> visited(desiredObjects.alist.size(), false);
887 std::vector<DetectedObject> bboxes{ std::begin(detectedObjects.alist), std::end(detectedObjects.alist) };
888 std::sort(bboxes.begin(), bboxes.end(), SortBBoxDescend);
891 for (auto&& detObj : bboxes) {
892 // Searching for the best match to this detection
893 // Searching for desired object
894 float overlap_max = -1;
896 auto desmax = desiredObjects.alist.end();
899 for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++, j++) {
900 double iou = DetectedObject::ioU(detObj, *desObj);
901 if (iou > overlap_max) {
902 overlap_max = static_cast<float>(iou);
909 if (overlap_max >= threshold) {
910 if (!desmax->difficult) {
911 if (!visited[jmax]) {
913 visited[jmax] = true;
917 matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk));
921 matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk));
925 for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++) {
926 if (!desObj->difficult) {
927 N[desObj->objectType]++;
932 std::map<int, double> calculateAveragePrecisionPerClass() const {
934 * Precision-to-TP curve per class (a variation of precision-to-recall curve without dividing into N)
936 std::map<int, std::map<int, double>> precisionToTP;
939 std::map<int, double> res;
941 for (auto m : matches) {
943 std::sort(m.second.begin(), m.second.end(), SortPairDescend);
948 std::vector<double> prec;
949 std::vector<double> rec;
951 for (auto mm : m.second) {
952 // Here we are descending in a probability value
953 MatchKind mk = mm.second;
954 if (mk == TruePositive) TP++;
955 else if (mk == FalsePositive) FP++;
957 double precision = static_cast<double>(TP) / (TP + FP);
959 if (N.find(clazz) != N.end()) {
960 recall = static_cast<double>(TP) / N.at(clazz);
963 prec.push_back(precision);
964 rec.push_back(recall);
967 int num = rec.size();
969 // 11point from Caffe
971 std::vector<float> max_precs(11, 0.);
972 int start_idx = num - 1;
973 for (int j = 10; j >= 0; --j) {
974 for (int i = start_idx; i >= 0; --i) {
975 if (rec[i] < j / 10.) {
978 max_precs[j-1] = max_precs[j];
982 if (max_precs[j] < prec[i]) {
983 max_precs[j] = static_cast<float>(prec[i]);
988 for (int j = 10; j >= 0; --j) {
989 ap += max_precs[j] / 11;
999 * @brief Adds colored rectangles to the image
1000 * @param data - data where rectangles are put
1001 * @param height - height of the rectangle
1002 * @param width - width of the rectangle
1003 * @param detectedObjects - vector of detected objects
1005 static UNUSED void addRectangles(unsigned char *data, size_t height, size_t width, std::vector<DetectedObject> detectedObjects) {
1006 std::vector<Color> colors = {
1030 for (size_t i = 0; i < detectedObjects.size(); i++) {
1031 int cls = detectedObjects[i].objectType % colors.size();
1033 int xmin = static_cast<int>(detectedObjects[i].xmin * width);
1034 int xmax = static_cast<int>(detectedObjects[i].xmax * width);
1035 int ymin = static_cast<int>(detectedObjects[i].ymin * height);
1036 int ymax = static_cast<int>(detectedObjects[i].ymax * height);
1038 size_t shift_first = ymin*width * 3;
1039 size_t shift_second = ymax*width * 3;
1040 for (int x = xmin; x < xmax; x++) {
1041 data[shift_first + x * 3] = colors.at(cls).red();
1042 data[shift_first + x * 3 + 1] = colors.at(cls).green();
1043 data[shift_first + x * 3 + 2] = colors.at(cls).blue();
1044 data[shift_second + x * 3] = colors.at(cls).red();
1045 data[shift_second + x * 3 + 1] = colors.at(cls).green();
1046 data[shift_second + x * 3 + 2] = colors.at(cls).blue();
1049 shift_first = xmin * 3;
1050 shift_second = xmax * 3;
1051 for (int y = ymin; y < ymax; y++) {
1052 data[shift_first + y*width * 3] = colors.at(cls).red();
1053 data[shift_first + y*width * 3 + 1] = colors.at(cls).green();
1054 data[shift_first + y*width * 3 + 2] = colors.at(cls).blue();
1055 data[shift_second + y*width * 3] = colors.at(cls).red();
1056 data[shift_second + y*width * 3 + 1] = colors.at(cls).green();
1057 data[shift_second + y*width * 3 + 2] = colors.at(cls).blue();