46db01e2d5335e42d0923b2d03d1e34ecd621d48
[platform/core/api/mediavision.git] / mv_machine_learning / inference / src / OutputMetadata.cpp
1 /**
2  * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "mv_private.h"
18
19 #include <unistd.h>
20 #include <fstream>
21 #include <string>
22 #include <queue>
23 #include <algorithm>
24
25 #include "OutputMetadata.h"
26 #include "Utils.h"
27
28 using namespace mediavision::inference::box;
29
30 namespace mediavision
31 {
32 namespace inference
33 {
34 int OutputMetadata::ParseScore(JsonObject *root, LayerInfo &layer)
35 {
36         if (!json_object_has_member(root, "score")) {
37                 LOGI("No score outputmetadata");
38                 return MEDIA_VISION_ERROR_NONE;
39         }
40
41         int ret = score.ParseScore(root);
42         if (ret != MEDIA_VISION_ERROR_NONE) {
43                 LOGE("Fail to parse score.");
44                 return ret;
45         }
46
47         layer.decodingType |= POSTPROCESS_DECODING_TYPE_SCORE;
48         return ret;
49 }
50
51 int OutputMetadata::ParseBox(JsonObject *root, LayerInfo &layer)
52 {
53         if (!json_object_has_member(root, "box")) {
54                 LOGI("No box outputmetadata");
55                 return MEDIA_VISION_ERROR_NONE;
56         }
57
58         int ret = box.ParseBox(root);
59         if (ret != MEDIA_VISION_ERROR_NONE) {
60                 LOGE("Fail to parse box.");
61                 return ret;
62         }
63
64         layer.decodingType |= POSTPROCESS_DECODING_TYPE_BOX;
65         return ret;
66 }
67
68 int OutputMetadata::ParseLandmark(JsonObject *root, LayerInfo &layer)
69 {
70         LOGI("ENTER");
71
72         if (!json_object_has_member(root, "landmark")) {
73                 LOGI("No landmark outputmetadata");
74                 LOGI("LEAVE");
75                 return MEDIA_VISION_ERROR_NONE;
76         }
77
78         landmark.ParseLandmark(root);
79         layer.decodingType |= POSTPROCESS_DECODING_TYPE_LANDMARK;
80
81         LOGI("LEAVE");
82         return MEDIA_VISION_ERROR_NONE;
83 }
84
85 int OutputMetadata::ParseOffset(JsonObject *root, LayerInfo &layer)
86 {
87         LOGI("ENTER");
88
89         if (!json_object_has_member(root, "offset")) {
90                 LOGI("No offset outputmetadata");
91                 LOGI("LEAVE");
92                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
93         }
94
95         offsetVec.ParseOffset(root, mSupportedShapeType);
96         layer.decodingType |= POSTPROCESS_DECODING_TYPE_OFFSETVEC;
97
98         LOGI("LEAVE");
99         return MEDIA_VISION_ERROR_NONE;
100 }
101
102 int OutputMetadata::GetPostProcess(JsonObject *root, LayerInfo &layer)
103 {
104         LOGI("ENTER");
105
106         JsonNode *node = json_object_get_member(root, "postprocess");
107         JsonObject *object = json_node_get_object(node);
108
109         if (json_object_has_member(object, "score")) {
110                 int ret = ParseScore(object, layer);
111                 if (ret != MEDIA_VISION_ERROR_NONE) {
112                         LOGE("Fail to GetScore[%d]", ret);
113                         return ret;
114                 }
115         }
116
117         if (json_object_has_member(object, "box")) {
118                 int ret = ParseBox(object, layer);
119                 if (ret != MEDIA_VISION_ERROR_NONE) {
120                         LOGE("Fail to GetBox[%d]", ret);
121                         return ret;
122                 }
123
124                 // addtional parsing is required according to decoding type
125                 if (box.GetDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) {
126                         int ret = box.ParseDecodeInfo(object);
127                         if (ret != MEDIA_VISION_ERROR_NONE) {
128                                 LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
129                                 return ret;
130                         }
131                 }
132                 if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
133                         ret = box.GetDecodeInfo().GenerateAnchor();
134                         if (ret != MEDIA_VISION_ERROR_NONE) {
135                                 LOGE("Fail to GenerateAnchor[%d]", ret);
136                                 return ret;
137                         }
138                 }
139         }
140
141         if (json_object_has_member(object, "label")) {
142                 int ret = label.Parse(object);
143                 if (ret != MEDIA_VISION_ERROR_NONE) {
144                         LOGE("Fail to GetLabel[%d]", ret);
145                         return ret;
146                 }
147
148                 layer.decodingType |= POSTPROCESS_DECODING_TYPE_LABEL;
149         }
150
151         if (json_object_has_member(object, "number")) {
152                 int ret = number.Parse(object);
153                 if (ret != MEDIA_VISION_ERROR_NONE) {
154                         LOGE("Fail to GetNumber[%d]", ret);
155                         return ret;
156                 }
157
158                 layer.decodingType |= POSTPROCESS_DECODING_TYPE_NUMBER;
159         }
160
161         if (json_object_has_member(object, "offset")) {
162                 int ret = ParseOffset(object, layer);
163                 if (ret != MEDIA_VISION_ERROR_NONE) {
164                         LOGE("Fail to GetOffsetVector[%d]", ret);
165                         return ret;
166                 }
167         }
168
169         if (json_object_has_member(object, "landmark")) {
170                 int ret = ParseLandmark(object, layer);
171                 if (ret != MEDIA_VISION_ERROR_NONE) {
172                         LOGE("Fail to GetLandmark[%d]", ret);
173                         return ret;
174                 }
175
176                 if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
177                         landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
178                         ret = landmark.ParseDecodeInfo(object, mSupportedShapeType);
179                         if (ret != MEDIA_VISION_ERROR_NONE) {
180                                 LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
181                                 return ret;
182                         }
183                 }
184         }
185
186         if (json_object_has_member(object, "displacement")) {
187                 int ret = landmark.ParseDisplacement(object, mSupportedShapeType);
188                 if (ret != MEDIA_VISION_ERROR_NONE) {
189                         LOGE("Fail to GetDispVector[%d]", ret);
190                         return ret;
191                 }
192
193                 layer.decodingType |= POSTPROCESS_DECODING_TYPE_DISPLACEMENT;
194
195                 // edgemap node is needed by posenet model(multi pose model) which has "displayment" node and
196                 // decoding type of the multi pose model is INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE.
197                 if (json_object_has_member(object, "edgemap")) {
198                         ret = landmark.ParseEdgeMap(object);
199                         if (ret != MEDIA_VISION_ERROR_NONE) {
200                                 LOGE("Fail to GetEdgeConnection[%d]", ret);
201                                 return ret;
202                         }
203                 }
204         }
205
206         parsed = true;
207
208         LOGI("LEAVE");
209
210         return MEDIA_VISION_ERROR_NONE;
211 }
212
213 int OutputMetadata::Parse(JsonObject *root, std::string key_name)
214 {
215         LOGI("ENTER");
216
217         JsonArray *outputList = json_object_get_array_member(root, key_name.c_str());
218
219         LOGI("output tensor count = %d", json_array_get_length(outputList));
220
221         for (auto idx = 0; idx < json_array_get_length(outputList); ++idx) {
222                 JsonNode *output_node = json_array_get_element(outputList, idx);
223                 std::string token(json_to_string(output_node, 1));
224                 int pos = token.find(":");
225                 std::string tensor_name = token.substr(0, pos);
226                 const std::vector<char> delimiters = { '{', ' ', ':', '\n', '\"' };
227
228                 for (auto &delimiter : delimiters)
229                         tensor_name.erase(std::remove(tensor_name.begin(), tensor_name.end(), delimiter), tensor_name.end());
230
231                 if (tensor_name.compare((std::string("tensor") + std::to_string(idx + 1))) != 0) {
232                         LOGE("Invalid tensor element. A tensor element form should be `tensorN`.");
233                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
234                 }
235
236                 LOGI("Parse tensor name : %s", tensor_name.c_str());
237
238                 JsonObject *output_object = json_node_get_object(output_node);
239
240                 if (!json_object_has_member(output_object, tensor_name.c_str())) {
241                         LOGE("No tensor member.");
242                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
243                 }
244
245                 JsonNode *tensor_node = json_object_get_member(output_object, tensor_name.c_str());
246                 JsonObject *tensor_object = json_node_get_object(tensor_node);
247                 std::string name_value = json_object_get_string_member(tensor_object, "name");
248
249                 LOGI("name = %s", name_value.c_str());
250
251                 // TODO. add tensor information here.
252
253                 LayerInfo layer = {
254                         name_value,
255                 };
256
257                 if (json_object_has_member(tensor_object, "postprocess")) {
258                         int ret = GetPostProcess(tensor_object, layer);
259                         if (ret != MEDIA_VISION_ERROR_NONE) {
260                                 LOGE("Fail to GetPostProcess.");
261                                 return ret;
262                         }
263                 }
264
265                 _tensor_info.insert(std::make_pair(name_value, layer));
266         }
267
268         parsed = true;
269
270         LOGI("LEAVE");
271
272         return MEDIA_VISION_ERROR_NONE;
273 }
274 } /* Inference */
275 } /* MediaVision */