mv_machine_learning/inference/src/OutputMetadata.cpp

   1 /**
   2  * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "mv_private.h"
  18
  19 #include <unistd.h>
  20 #include <fstream>
  21 #include <string>
  22 #include <queue>
  23 #include <algorithm>
  24
  25 #include "OutputMetadata.h"
  26 #include "Utils.h"
  27
  28 using namespace mediavision::inference::box;
  29
  30 namespace mediavision
  31 {
  32 namespace inference
  33 {
  34 int OutputMetadata::ParseScore(JsonObject *root, LayerInfo &layer)
  35 {
  36         if (!json_object_has_member(root, "score")) {
  37                 LOGI("No score outputmetadata");
  38                 return MEDIA_VISION_ERROR_NONE;
  39         }
  40
  41         int ret = score.ParseScore(root);
  42         if (ret != MEDIA_VISION_ERROR_NONE) {
  43                 LOGE("Fail to parse score.");
  44                 return ret;
  45         }
  46
  47         layer.decodingType |= POSTPROCESS_DECODING_TYPE_SCORE;
  48         return ret;
  49 }
  50
  51 int OutputMetadata::ParseBox(JsonObject *root, LayerInfo &layer)
  52 {
  53         if (!json_object_has_member(root, "box")) {
  54                 LOGI("No box outputmetadata");
  55                 return MEDIA_VISION_ERROR_NONE;
  56         }
  57
  58         int ret = box.ParseBox(root);
  59         if (ret != MEDIA_VISION_ERROR_NONE) {
  60                 LOGE("Fail to parse box.");
  61                 return ret;
  62         }
  63
  64         layer.decodingType |= POSTPROCESS_DECODING_TYPE_BOX;
  65         return ret;
  66 }
  67
  68 int OutputMetadata::ParseLandmark(JsonObject *root, LayerInfo &layer)
  69 {
  70         LOGI("ENTER");
  71
  72         if (!json_object_has_member(root, "landmark")) {
  73                 LOGI("No landmark outputmetadata");
  74                 LOGI("LEAVE");
  75                 return MEDIA_VISION_ERROR_NONE;
  76         }
  77
  78         landmark.ParseLandmark(root);
  79         layer.decodingType |= POSTPROCESS_DECODING_TYPE_LANDMARK;
  80
  81         LOGI("LEAVE");
  82         return MEDIA_VISION_ERROR_NONE;
  83 }
  84
  85 int OutputMetadata::ParseOffset(JsonObject *root, LayerInfo &layer)
  86 {
  87         LOGI("ENTER");
  88
  89         if (!json_object_has_member(root, "offset")) {
  90                 LOGI("No offset outputmetadata");
  91                 LOGI("LEAVE");
  92                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
  93         }
  94
  95         offsetVec.ParseOffset(root, mSupportedShapeType);
  96         layer.decodingType |= POSTPROCESS_DECODING_TYPE_OFFSETVEC;
  97
  98         LOGI("LEAVE");
  99         return MEDIA_VISION_ERROR_NONE;
 100 }
 101
 102 int OutputMetadata::GetPostProcess(JsonObject *root, LayerInfo &layer)
 103 {
 104         LOGI("ENTER");
 105
 106         JsonNode *node = json_object_get_member(root, "postprocess");
 107         JsonObject *object = json_node_get_object(node);
 108
 109         if (json_object_has_member(object, "score")) {
 110                 int ret = ParseScore(object, layer);
 111                 if (ret != MEDIA_VISION_ERROR_NONE) {
 112                         LOGE("Fail to GetScore[%d]", ret);
 113                         return ret;
 114                 }
 115         }
 116
 117         if (json_object_has_member(object, "box")) {
 118                 int ret = ParseBox(object, layer);
 119                 if (ret != MEDIA_VISION_ERROR_NONE) {
 120                         LOGE("Fail to GetBox[%d]", ret);
 121                         return ret;
 122                 }
 123
 124                 // addtional parsing is required according to decoding type
 125                 if (box.GetDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 126                         int ret = box.ParseDecodeInfo(object);
 127                         if (ret != MEDIA_VISION_ERROR_NONE) {
 128                                 LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
 129                                 return ret;
 130                         }
 131                 }
 132                 if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
 133                         ret = box.GetDecodeInfo().GenerateAnchor();
 134                         if (ret != MEDIA_VISION_ERROR_NONE) {
 135                                 LOGE("Fail to GenerateAnchor[%d]", ret);
 136                                 return ret;
 137                         }
 138                 }
 139         }
 140
 141         if (json_object_has_member(object, "label")) {
 142                 int ret = label.Parse(object);
 143                 if (ret != MEDIA_VISION_ERROR_NONE) {
 144                         LOGE("Fail to GetLabel[%d]", ret);
 145                         return ret;
 146                 }
 147
 148                 layer.decodingType |= POSTPROCESS_DECODING_TYPE_LABEL;
 149         }
 150
 151         if (json_object_has_member(object, "number")) {
 152                 int ret = number.Parse(object);
 153                 if (ret != MEDIA_VISION_ERROR_NONE) {
 154                         LOGE("Fail to GetNumber[%d]", ret);
 155                         return ret;
 156                 }
 157
 158                 layer.decodingType |= POSTPROCESS_DECODING_TYPE_NUMBER;
 159         }
 160
 161         if (json_object_has_member(object, "offset")) {
 162                 int ret = ParseOffset(object, layer);
 163                 if (ret != MEDIA_VISION_ERROR_NONE) {
 164                         LOGE("Fail to GetOffsetVector[%d]", ret);
 165                         return ret;
 166                 }
 167         }
 168
 169         if (json_object_has_member(object, "landmark")) {
 170                 int ret = ParseLandmark(object, layer);
 171                 if (ret != MEDIA_VISION_ERROR_NONE) {
 172                         LOGE("Fail to GetLandmark[%d]", ret);
 173                         return ret;
 174                 }
 175
 176                 if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
 177                         landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
 178                         ret = landmark.ParseDecodeInfo(object, mSupportedShapeType);
 179                         if (ret != MEDIA_VISION_ERROR_NONE) {
 180                                 LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
 181                                 return ret;
 182                         }
 183                 }
 184         }
 185
 186         if (json_object_has_member(object, "displacement")) {
 187                 int ret = landmark.ParseDisplacement(object, mSupportedShapeType);
 188                 if (ret != MEDIA_VISION_ERROR_NONE) {
 189                         LOGE("Fail to GetDispVector[%d]", ret);
 190                         return ret;
 191                 }
 192
 193                 layer.decodingType |= POSTPROCESS_DECODING_TYPE_DISPLACEMENT;
 194
 195                 // edgemap node is needed by posenet model(multi pose model) which has "displayment" node and
 196                 // decoding type of the multi pose model is INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE.
 197                 if (json_object_has_member(object, "edgemap")) {
 198                         ret = landmark.ParseEdgeMap(object);
 199                         if (ret != MEDIA_VISION_ERROR_NONE) {
 200                                 LOGE("Fail to GetEdgeConnection[%d]", ret);
 201                                 return ret;
 202                         }
 203                 }
 204         }
 205
 206         parsed = true;
 207
 208         LOGI("LEAVE");
 209
 210         return MEDIA_VISION_ERROR_NONE;
 211 }
 212
 213 int OutputMetadata::Parse(JsonObject *root, std::string key_name)
 214 {
 215         LOGI("ENTER");
 216
 217         JsonArray *outputList = json_object_get_array_member(root, key_name.c_str());
 218
 219         LOGI("output tensor count = %d", json_array_get_length(outputList));
 220
 221         for (auto idx = 0; idx < json_array_get_length(outputList); ++idx) {
 222                 JsonNode *output_node = json_array_get_element(outputList, idx);
 223                 std::string token(json_to_string(output_node, 1));
 224                 int pos = token.find(":");
 225                 std::string tensor_name = token.substr(0, pos);
 226                 const std::vector<char> delimiters = { '{', ' ', ':', '\n', '\"' };
 227
 228                 for (auto &delimiter : delimiters)
 229                         tensor_name.erase(std::remove(tensor_name.begin(), tensor_name.end(), delimiter), tensor_name.end());
 230
 231                 if (tensor_name.compare((std::string("tensor") + std::to_string(idx + 1))) != 0) {
 232                         LOGE("Invalid tensor element. A tensor element form should be `tensorN`.");
 233                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
 234                 }
 235
 236                 LOGI("Parse tensor name : %s", tensor_name.c_str());
 237
 238                 JsonObject *output_object = json_node_get_object(output_node);
 239
 240                 if (!json_object_has_member(output_object, tensor_name.c_str())) {
 241                         LOGE("No tensor member.");
 242                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
 243                 }
 244
 245                 JsonNode *tensor_node = json_object_get_member(output_object, tensor_name.c_str());
 246                 JsonObject *tensor_object = json_node_get_object(tensor_node);
 247                 std::string name_value = json_object_get_string_member(tensor_object, "name");
 248
 249                 LOGI("name = %s", name_value.c_str());
 250
 251                 // TODO. add tensor information here.
 252
 253                 LayerInfo layer = {
 254                         name_value,
 255                 };
 256
 257                 if (json_object_has_member(tensor_object, "postprocess")) {
 258                         int ret = GetPostProcess(tensor_object, layer);
 259                         if (ret != MEDIA_VISION_ERROR_NONE) {
 260                                 LOGE("Fail to GetPostProcess.");
 261                                 return ret;
 262                         }
 263                 }
 264
 265                 _tensor_info.insert(std::make_pair(name_value, layer));
 266         }
 267
 268         parsed = true;
 269
 270         LOGI("LEAVE");
 271
 272         return MEDIA_VISION_ERROR_NONE;
 273 }
 274 } /* Inference */
 275 } /* MediaVision */