mv_machine_learning/object_detection/src/mobilenet_v1_ssd.cpp

   1 /**
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <string.h>
  18 #include <map>
  19 #include <algorithm>
  20
  21 #include "machine_learning_exception.h"
  22 #include "mv_object_detection_config.h"
  23 #include "mobilenet_v1_ssd.h"
  24 #include "Postprocess.h"
  25
  26 using namespace std;
  27 using namespace mediavision::inference;
  28 using namespace mediavision::machine_learning::exception;
  29
  30 namespace mediavision
  31 {
  32 namespace machine_learning
  33 {
  34 MobilenetV1Ssd::MobilenetV1Ssd(ObjectDetectionTaskType task_type, std::shared_ptr<MachineLearningConfig> config)
  35                 : ObjectDetection(task_type, config), _result()
  36 {}
  37
  38 MobilenetV1Ssd::~MobilenetV1Ssd()
  39 {}
  40
  41 ObjectDetectionResult &MobilenetV1Ssd::result()
  42 {
  43         // Clear _result object because result() function can be called every time user wants
  44         // so make sure to clear existing result data before getting the data again.
  45         memset(reinterpret_cast<void *>(&_result), 0, sizeof(_result));
  46
  47         vector<string> names;
  48
  49         ObjectDetection::getOutputNames(names);
  50
  51         vector<float> number_tensor;
  52
  53         // TFLite_Detection_PostProcess:3
  54         ObjectDetection::getOutputTensor(names[3], number_tensor);
  55
  56         vector<float> label_tensor;
  57
  58         // TFLite_Detection_PostProcess:1
  59         ObjectDetection::getOutputTensor(names[1], label_tensor);
  60
  61         vector<float> score_tensor;
  62         map<float, unsigned int, std::greater<float> > sorted_score;
  63
  64         auto scoreMetaInfo = _config->getOutputMetaMap().at(names[2]);
  65         auto decodingScore = static_pointer_cast<DecodingScore>(scoreMetaInfo->decodingTypeMap[DecodingType::SCORE]);
  66
  67         // TFLite_Detection_PostProcess:2
  68         ObjectDetection::getOutputTensor(names[2], score_tensor);
  69         for (size_t idx = 0; idx < score_tensor.size(); ++idx) {
  70                 if (decodingScore->threshold > score_tensor[idx])
  71                         continue;
  72
  73                 sorted_score[score_tensor[idx]] = idx;
  74         }
  75
  76         auto boxMetaInfo = _config->getOutputMetaMap().at(names[0]);
  77         auto decodingBox = static_pointer_cast<DecodingBox>(boxMetaInfo->decodingTypeMap[DecodingType::BOX]);
  78         vector<float> box_tensor;
  79
  80         ObjectDetection::getOutputTensor(names[0], box_tensor);
  81
  82         for (auto &score : sorted_score) {
  83                 _result.number_of_objects++;
  84                 // second is idx
  85                 _result.names.push_back(_labels[label_tensor[score.second]]);
  86                 _result.indices.push_back(_result.number_of_objects - 1);
  87                 _result.confidences.push_back(score.first);
  88
  89                 vector<unsigned int> &order = decodingBox->order;
  90
  91                 _result.left.push_back(
  92                                 static_cast<int>(box_tensor[score.second * 4 + order[0]] * _preprocess.getImageWidth()[0]));
  93                 _result.top.push_back(
  94                                 static_cast<int>(box_tensor[score.second * 4 + order[1]] * _preprocess.getImageHeight()[0]));
  95                 _result.right.push_back(
  96                                 static_cast<int>(box_tensor[score.second * 4 + order[2]] * _preprocess.getImageWidth()[0]));
  97                 _result.bottom.push_back(
  98                                 static_cast<int>(box_tensor[score.second * 4 + order[3]] * _preprocess.getImageHeight()[0]));
  99
 100                 LOGI("idx = %d, name = %s, score = %f, %dx%d, %dx%d", score.second,
 101                          _result.names[_result.number_of_objects - 1].c_str(), _result.confidences[_result.number_of_objects - 1],
 102                          _result.left[_result.number_of_objects - 1], _result.top[_result.number_of_objects - 1],
 103                          _result.right[_result.number_of_objects - 1], _result.bottom[_result.number_of_objects - 1]);
 104
 105                 if (decodingScore->topNumber == _result.number_of_objects)
 106                         break;
 107         }
 108
 109         return _result;
 110 }
 111
 112 }
 113 }