cf70ac97de4c05106190397565325a1260d7d4c7
[platform/core/api/mediavision.git] / mv_machine_learning / object_detection / src / mobilenet_v1_ssd.cpp
1 /**
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <string.h>
18 #include <map>
19 #include <algorithm>
20
21 #include "machine_learning_exception.h"
22 #include "mv_object_detection_config.h"
23 #include "mobilenet_v1_ssd.h"
24 #include "Postprocess.h"
25
26 using namespace std;
27 using namespace mediavision::inference;
28 using namespace mediavision::machine_learning::exception;
29
30 namespace mediavision
31 {
32 namespace machine_learning
33 {
34 MobilenetV1Ssd::MobilenetV1Ssd(ObjectDetectionTaskType task_type, std::shared_ptr<MachineLearningConfig> config)
35                 : ObjectDetection(task_type, config), _result()
36 {}
37
38 MobilenetV1Ssd::~MobilenetV1Ssd()
39 {}
40
41 ObjectDetectionResult &MobilenetV1Ssd::result()
42 {
43         // Clear _result object because result() function can be called every time user wants
44         // so make sure to clear existing result data before getting the data again.
45         memset(reinterpret_cast<void *>(&_result), 0, sizeof(_result));
46
47         vector<string> names;
48
49         ObjectDetection::getOutputNames(names);
50
51         vector<float> number_tensor;
52
53         // TFLite_Detection_PostProcess:3
54         ObjectDetection::getOutputTensor(names[3], number_tensor);
55
56         vector<float> label_tensor;
57
58         // TFLite_Detection_PostProcess:1
59         ObjectDetection::getOutputTensor(names[1], label_tensor);
60
61         vector<float> score_tensor;
62         map<float, unsigned int, std::greater<float> > sorted_score;
63
64         auto scoreMetaInfo = _config->getOutputMetaMap().at(names[2]);
65         auto decodingScore = static_pointer_cast<DecodingScore>(scoreMetaInfo->decodingTypeMap[DecodingType::SCORE]);
66
67         // TFLite_Detection_PostProcess:2
68         ObjectDetection::getOutputTensor(names[2], score_tensor);
69         for (size_t idx = 0; idx < score_tensor.size(); ++idx) {
70                 if (decodingScore->threshold > score_tensor[idx])
71                         continue;
72
73                 sorted_score[score_tensor[idx]] = idx;
74         }
75
76         auto boxMetaInfo = _config->getOutputMetaMap().at(names[0]);
77         auto decodingBox = static_pointer_cast<DecodingBox>(boxMetaInfo->decodingTypeMap[DecodingType::BOX]);
78         vector<float> box_tensor;
79
80         ObjectDetection::getOutputTensor(names[0], box_tensor);
81
82         for (auto &score : sorted_score) {
83                 _result.number_of_objects++;
84                 // second is idx
85                 _result.names.push_back(_labels[label_tensor[score.second]]);
86                 _result.indices.push_back(_result.number_of_objects - 1);
87                 _result.confidences.push_back(score.first);
88
89                 vector<unsigned int> &order = decodingBox->order;
90
91                 _result.left.push_back(
92                                 static_cast<int>(box_tensor[score.second * 4 + order[0]] * _preprocess.getImageWidth()[0]));
93                 _result.top.push_back(
94                                 static_cast<int>(box_tensor[score.second * 4 + order[1]] * _preprocess.getImageHeight()[0]));
95                 _result.right.push_back(
96                                 static_cast<int>(box_tensor[score.second * 4 + order[2]] * _preprocess.getImageWidth()[0]));
97                 _result.bottom.push_back(
98                                 static_cast<int>(box_tensor[score.second * 4 + order[3]] * _preprocess.getImageHeight()[0]));
99
100                 LOGI("idx = %d, name = %s, score = %f, %dx%d, %dx%d", score.second,
101                          _result.names[_result.number_of_objects - 1].c_str(), _result.confidences[_result.number_of_objects - 1],
102                          _result.left[_result.number_of_objects - 1], _result.top[_result.number_of_objects - 1],
103                          _result.right[_result.number_of_objects - 1], _result.bottom[_result.number_of_objects - 1]);
104
105                 if (decodingScore->topNumber == _result.number_of_objects)
106                         break;
107         }
108
109         return _result;
110 }
111
112 }
113 }