2 * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "mv_private.h"
25 #include "OutputMetadata.h"
28 using namespace mediavision::inference::box;
34 int OutputMetadata::ParseScore(JsonObject *root, LayerInfo &layer)
36 if (!json_object_has_member(root, "score")) {
37 LOGI("No score outputmetadata");
38 return MEDIA_VISION_ERROR_NONE;
41 int ret = score.ParseScore(root);
42 if (ret != MEDIA_VISION_ERROR_NONE) {
43 LOGE("Fail to parse score.");
47 layer.decodingType |= POSTPROCESS_DECODING_TYPE_SCORE;
51 int OutputMetadata::ParseBox(JsonObject *root, LayerInfo &layer)
53 if (!json_object_has_member(root, "box")) {
54 LOGI("No box outputmetadata");
55 return MEDIA_VISION_ERROR_NONE;
58 int ret = box.ParseBox(root);
59 if (ret != MEDIA_VISION_ERROR_NONE) {
60 LOGE("Fail to parse box.");
64 layer.decodingType |= POSTPROCESS_DECODING_TYPE_BOX;
68 int OutputMetadata::ParseLandmark(JsonObject *root, LayerInfo &layer)
72 if (!json_object_has_member(root, "landmark")) {
73 LOGI("No landmark outputmetadata");
75 return MEDIA_VISION_ERROR_NONE;
78 landmark.ParseLandmark(root);
79 layer.decodingType |= POSTPROCESS_DECODING_TYPE_LANDMARK;
82 return MEDIA_VISION_ERROR_NONE;
85 int OutputMetadata::ParseOffset(JsonObject *root, LayerInfo &layer)
89 if (!json_object_has_member(root, "offset")) {
90 LOGI("No offset outputmetadata");
92 return MEDIA_VISION_ERROR_INVALID_OPERATION;
95 offsetVec.ParseOffset(root, mSupportedShapeType);
96 layer.decodingType |= POSTPROCESS_DECODING_TYPE_OFFSETVEC;
99 return MEDIA_VISION_ERROR_NONE;
102 int OutputMetadata::GetPostProcess(JsonObject *root, LayerInfo &layer)
106 JsonNode *node = json_object_get_member(root, "postprocess");
107 JsonObject *object = json_node_get_object(node);
109 if (json_object_has_member(object, "score")) {
110 int ret = ParseScore(object, layer);
111 if (ret != MEDIA_VISION_ERROR_NONE) {
112 LOGE("Fail to GetScore[%d]", ret);
117 if (json_object_has_member(object, "box")) {
118 int ret = ParseBox(object, layer);
119 if (ret != MEDIA_VISION_ERROR_NONE) {
120 LOGE("Fail to GetBox[%d]", ret);
124 // addtional parsing is required according to decoding type
125 if (box.GetDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) {
126 int ret = box.ParseDecodeInfo(object);
127 if (ret != MEDIA_VISION_ERROR_NONE) {
128 LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
132 if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
133 ret = box.GetDecodeInfo().GenerateAnchor();
134 if (ret != MEDIA_VISION_ERROR_NONE) {
135 LOGE("Fail to GenerateAnchor[%d]", ret);
141 if (json_object_has_member(object, "label")) {
142 int ret = label.Parse(object);
143 if (ret != MEDIA_VISION_ERROR_NONE) {
144 LOGE("Fail to GetLabel[%d]", ret);
148 layer.decodingType |= POSTPROCESS_DECODING_TYPE_LABEL;
151 if (json_object_has_member(object, "number")) {
152 int ret = number.Parse(object);
153 if (ret != MEDIA_VISION_ERROR_NONE) {
154 LOGE("Fail to GetNumber[%d]", ret);
158 layer.decodingType |= POSTPROCESS_DECODING_TYPE_NUMBER;
161 if (json_object_has_member(object, "offset")) {
162 int ret = ParseOffset(object, layer);
163 if (ret != MEDIA_VISION_ERROR_NONE) {
164 LOGE("Fail to GetOffsetVector[%d]", ret);
169 if (json_object_has_member(object, "landmark")) {
170 int ret = ParseLandmark(object, layer);
171 if (ret != MEDIA_VISION_ERROR_NONE) {
172 LOGE("Fail to GetLandmark[%d]", ret);
176 if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
177 landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
178 ret = landmark.ParseDecodeInfo(object, mSupportedShapeType);
179 if (ret != MEDIA_VISION_ERROR_NONE) {
180 LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
186 if (json_object_has_member(object, "displacement")) {
187 int ret = landmark.ParseDisplacement(object, mSupportedShapeType);
188 if (ret != MEDIA_VISION_ERROR_NONE) {
189 LOGE("Fail to GetDispVector[%d]", ret);
193 layer.decodingType |= POSTPROCESS_DECODING_TYPE_DISPLACEMENT;
195 // edgemap node is needed by posenet model(multi pose model) which has "displayment" node and
196 // decoding type of the multi pose model is INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE.
197 if (json_object_has_member(object, "edgemap")) {
198 ret = landmark.ParseEdgeMap(object);
199 if (ret != MEDIA_VISION_ERROR_NONE) {
200 LOGE("Fail to GetEdgeConnection[%d]", ret);
210 return MEDIA_VISION_ERROR_NONE;
213 int OutputMetadata::Parse(JsonObject *root, std::string key_name)
217 JsonArray *outputList = json_object_get_array_member(root, key_name.c_str());
219 LOGI("output tensor count = %d", json_array_get_length(outputList));
221 for (auto idx = 0; idx < json_array_get_length(outputList); ++idx) {
222 JsonNode *output_node = json_array_get_element(outputList, idx);
223 std::string token(json_to_string(output_node, 1));
224 int pos = token.find(":");
225 std::string tensor_name = token.substr(0, pos);
226 const std::vector<char> delimiters = { '{', ' ', ':', '\n', '\"' };
228 for (auto &delimiter : delimiters)
229 tensor_name.erase(std::remove(tensor_name.begin(), tensor_name.end(), delimiter), tensor_name.end());
231 if (tensor_name.compare((std::string("tensor") + std::to_string(idx + 1))) != 0) {
232 LOGE("Invalid tensor element. A tensor element form should be `tensorN`.");
233 return MEDIA_VISION_ERROR_INVALID_OPERATION;
236 LOGI("Parse tensor name : %s", tensor_name.c_str());
238 JsonObject *output_object = json_node_get_object(output_node);
240 if (!json_object_has_member(output_object, tensor_name.c_str())) {
241 LOGE("No tensor member.");
242 return MEDIA_VISION_ERROR_INVALID_OPERATION;
245 JsonNode *tensor_node = json_object_get_member(output_object, tensor_name.c_str());
246 JsonObject *tensor_object = json_node_get_object(tensor_node);
247 std::string name_value = json_object_get_string_member(tensor_object, "name");
249 LOGI("name = %s", name_value.c_str());
251 // TODO. add tensor information here.
257 if (json_object_has_member(tensor_object, "postprocess")) {
258 int ret = GetPostProcess(tensor_object, layer);
259 if (ret != MEDIA_VISION_ERROR_NONE) {
260 LOGE("Fail to GetPostProcess.");
265 _tensor_info.insert(std::make_pair(name_value, layer));
272 return MEDIA_VISION_ERROR_NONE;