From 10965865f20620704308c026f3139abbb8f96742 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Thu, 30 Sep 2021 19:45:34 +0900 Subject: [PATCH 01/16] test/machine_learning/inference: add test cases for legacy path Added test cases for legacy path of inference engine, which uses user-given model information instead of ones from json file. As for this, this patch has a little bit code refactoring which uses parameterized test instead of fixture one of google test to decide API path in runtime - json or legacy. This patch enlarges exsiting test coverage from 119 to 132 test cases. [==========] 132 tests from 6 test suites ran. (49021 ms total) [ PASSED ] 132 tests. Change-Id: I9829725aad8037cbe5a82d50e7790a3e7a6bfe6b Signed-off-by: Inki Dae --- .../inference/test_face_detection.cpp | 40 +++- .../inference/test_face_landmark_detection.cpp | 11 +- .../inference/test_image_classification.cpp | 204 +++++++++++++++++++-- .../inference/test_inference_helper.cpp | 25 ++- .../inference/test_inference_helper.hpp | 18 +- .../inference/test_object_detection.cpp | 41 ++++- .../inference/test_pose_landmark_detection.cpp | 41 ++++- 7 files changed, 341 insertions(+), 39 deletions(-) diff --git a/test/testsuites/machine_learning/inference/test_face_detection.cpp b/test/testsuites/machine_learning/inference/test_face_detection.cpp index ebf37e5..376a717 100644 --- a/test/testsuites/machine_learning/inference/test_face_detection.cpp +++ b/test/testsuites/machine_learning/inference/test_face_detection.cpp @@ -27,6 +27,7 @@ public: { ASSERT_EQ(mv_inference_configure(infer, engine_cfg), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_inference_prepare(infer), MEDIA_VISION_ERROR_NONE); ASSERT_EQ(MediaVision::Common::ImageHelper::loadImageToSource( IMG_FACE, mv_source), @@ -37,9 +38,42 @@ public: } }; -TEST_F(TestFaceDetection, CPU_TFLITE_MobilenetV1_SSD) +TEST_P(TestFaceDetection, CPU_TFLITE_MobilenetV1_SSD) { engine_config_hosted_cpu_tflite(engine_cfg, - FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH); + FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH, _use_json_parser); + if (!_use_json_parser) { + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3), + MEDIA_VISION_ERROR_NONE); + + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 4), MEDIA_VISION_ERROR_NONE); + } + inferenceFace(); -} \ No newline at end of file +} + +INSTANTIATE_TEST_CASE_P(Prefix, TestFaceDetection, + ::testing::Values( + ParamTypeOne(false), + ParamTypeOne(true) + ) +); \ No newline at end of file diff --git a/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp b/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp index 6d4ada9..e186c6f 100644 --- a/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp +++ b/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp @@ -37,7 +37,7 @@ public: } }; -TEST_F(TestFaceLandmarkDetection, CPU_OPENCV_CAFFE_CNNCASCADE) +TEST_P(TestFaceLandmarkDetection, CPU_OPENCV_CAFFE_CNNCASCADE) { const char *inputNodeName = "data"; const char *outputNodeName[] = { "Sigmoid_fc2" }; @@ -85,4 +85,11 @@ TEST_F(TestFaceLandmarkDetection, CPU_OPENCV_CAFFE_CNNCASCADE) outputNodeName, 1), MEDIA_VISION_ERROR_NONE); inferenceFaceLandmark(); -} \ No newline at end of file +} + +INSTANTIATE_TEST_CASE_P(Prefix, TestFaceLandmarkDetection, + ::testing::Values( + ParamTypeOne(false), + ParamTypeOne(true) + ) +); \ No newline at end of file diff --git a/test/testsuites/machine_learning/inference/test_image_classification.cpp b/test/testsuites/machine_learning/inference/test_image_classification.cpp index 0aab594..cdda3f6 100644 --- a/test/testsuites/machine_learning/inference/test_image_classification.cpp +++ b/test/testsuites/machine_learning/inference/test_image_classification.cpp @@ -63,58 +63,232 @@ public: } }; -TEST_F(TestImageClassification, CPU_TFLITE_MobilenetV1) +TEST_P(TestImageClassification, CPU_TFLITE_MobilenetV1) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, IC_TFLITE_WEIGHT_MOBILENET_V1_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "MobilenetV1/Predictions/Reshape_1" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); } -TEST_F(TestImageClassification, CPU_TFLITE_MobilenetV2) +TEST_P(TestImageClassification, CPU_TFLITE_MobilenetV2) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, IC_TFLITE_WEIGHT_MOBILENET_V2_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "MobilenetV2/Predictions/Reshape_1" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.01), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); } -TEST_F(TestImageClassification, CPU_TFLITE_Densenet) +TEST_P(TestImageClassification, CPU_TFLITE_Densenet) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, IC_TFLITE_WEIGHT_DENSENET_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "Placeholder"; + const char *outputNodeName[] = { "softmax_tensor" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 255.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); } -TEST_F(TestImageClassification, CPU_TFLITE_Nasnet) +TEST_P(TestImageClassification, CPU_TFLITE_Nasnet) { engine_config_hosted_cpu_tflite_user_model(engine_cfg, IC_TFLITE_WEIGHT_NASNET_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, + _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "final_layer/predictions" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); } -TEST_F(TestImageClassification, CPU_TFLITE_MNasnet) +TEST_P(TestImageClassification, CPU_TFLITE_MNasnet) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, IC_TFLITE_WEIGHT_MNASNET_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, + _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "output" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 57.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); } -TEST_F(TestImageClassification, CPU_TFLITE_Squeezenet) +TEST_P(TestImageClassification, CPU_TFLITE_Squeezenet) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, IC_TFLITE_WEIGHT_SQUEEZENET_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, + _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "Placeholder"; + const char *outputNodeName[] = { "softmax_tensor" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); } -TEST_F(TestImageClassification, CPU_TFLITE_QUANT_MobilenetV1) +TEST_P(TestImageClassification, CPU_TFLITE_QUANT_MobilenetV1) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, IC_TFLITE_WEIGHT_QUANT_MOBILENET_V1_224_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); + IC_LABEL_MOBILENET_V1_224_PATH, + _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "MobilenetV1/Predictions/Reshape_1" }; + + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_UINT8), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 1.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + inferenceBanana(); -} \ No newline at end of file +} + +INSTANTIATE_TEST_CASE_P(Prefix, TestImageClassification, + ::testing::Values( + ParamTypeOne(false), + ParamTypeOne(true) + ) +); \ No newline at end of file diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp index 1c88689..81a0380 100644 --- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp +++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp @@ -16,19 +16,23 @@ TestInference::~TestInference() } void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, - const char *tf_weight) + const char *tf_weight, + const bool use_json_parser) { EXPECT_EQ(mv_engine_config_set_string_attribute( handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight), MEDIA_VISION_ERROR_NONE); - std::string meta_file_path = tf_weight; - meta_file_path = meta_file_path.substr(0, meta_file_path.find('.')); - meta_file_path += std::string(".json"); + if (use_json_parser) { + std::string meta_file_path = tf_weight; + meta_file_path = meta_file_path.substr(0, meta_file_path.find('.')); + meta_file_path += std::string(".json"); + + EXPECT_EQ(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_META_FILE_PATH , meta_file_path.c_str()), + MEDIA_VISION_ERROR_NONE); + } - EXPECT_EQ(mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_META_FILE_PATH , meta_file_path.c_str()), - MEDIA_VISION_ERROR_NONE); EXPECT_EQ(mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_TFLITE), @@ -41,10 +45,11 @@ void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle, const char *tf_weight, - const char *user_file) + const char *user_file, + const bool use_json_parser) { - engine_config_hosted_cpu_tflite(handle, tf_weight); + engine_config_hosted_cpu_tflite(handle, tf_weight, use_json_parser); EXPECT_EQ(mv_engine_config_set_string_attribute( handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file), MEDIA_VISION_ERROR_NONE); -} +} \ No newline at end of file diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.hpp b/test/testsuites/machine_learning/inference/test_inference_helper.hpp index 16bb4c6..a04fb00 100644 --- a/test/testsuites/machine_learning/inference/test_inference_helper.hpp +++ b/test/testsuites/machine_learning/inference/test_inference_helper.hpp @@ -3,8 +3,18 @@ #include -class TestInference : public ::testing::Test +typedef std::tuple ParamTypeOne; + +class TestInference : public ::testing::TestWithParam { +protected: + void SetUp() final + { + std::tie(_use_json_parser) = GetParam(); + } + + bool _use_json_parser; + public: TestInference(); virtual ~TestInference(); @@ -14,10 +24,12 @@ public: }; void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, - const char *tf_weight); + const char *tf_weight, + const bool use_json_parser); void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle, const char *tf_weight, - const char *user_file); + const char *user_file, + const bool use_json_parser); #endif //__TEST_INFERENCE_HELPER_HPP__ diff --git a/test/testsuites/machine_learning/inference/test_object_detection.cpp b/test/testsuites/machine_learning/inference/test_object_detection.cpp index 8cea9f7..b849c20 100644 --- a/test/testsuites/machine_learning/inference/test_object_detection.cpp +++ b/test/testsuites/machine_learning/inference/test_object_detection.cpp @@ -46,10 +46,45 @@ public: } }; -TEST_F(TestObjectDetection, CPU_TFLITE_MobilenetV1_SSD) +TEST_P(TestObjectDetection, CPU_TFLITE_MobilenetV1_SSD) { engine_config_hosted_cpu_tflite_user_model( engine_cfg, OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH, - OD_LABEL_MOBILENET_V1_SSD_300_PATH); + OD_LABEL_MOBILENET_V1_SSD_300_PATH, + _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3), + MEDIA_VISION_ERROR_NONE); + + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 4), MEDIA_VISION_ERROR_NONE); + } + inferenceDog(); -} \ No newline at end of file +} + +INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetection, + ::testing::Values( + ParamTypeOne(false), + ParamTypeOne(true) + ) +); \ No newline at end of file diff --git a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp index 033488c..58c4b43 100644 --- a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp +++ b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp @@ -37,9 +37,44 @@ public: } }; -TEST_F(TestPoseLandmarkDetection, CPU_TFLITE_MobilenetV1) +TEST_P(TestPoseLandmarkDetection, CPU_TFLITE_MobilenetV1) { engine_config_hosted_cpu_tflite( - engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH); + engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH, _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "sub_2"; + const char *outputNodeName[] = { "MobilenetV1/heatmap_2/BiasAdd", + "MobilenetV1/offset_2/BiasAdd", + "MobilenetV1/displacement_fwd_2/BiasAdd", + "MobilenetV1/displacement_bwd_2/BiasAdd" }; + + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3), + MEDIA_VISION_ERROR_NONE); + + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 4), MEDIA_VISION_ERROR_NONE); + } + + inferencePoseLandmark(); -} \ No newline at end of file +} + +INSTANTIATE_TEST_CASE_P(Prefix, TestPoseLandmarkDetection, + ::testing::Values( + ParamTypeOne(false), + ParamTypeOne(true) + ) +); \ No newline at end of file -- 2.7.4 From 78b72ee27b5b6ae5355046e82fcc3d604ab0a55f Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Thu, 7 Oct 2021 15:30:36 +0900 Subject: [PATCH 02/16] mv_machine_learning: code cleanup to GetFacialLandMarkDetectionResults function Did code cleanup to GetFacialLandMarkDetectionResults function by doing, - code sliding - change variable name to meaningful name for readability. Change-Id: Ib59786c085c8202a1f7d9eb85a01d528220c728f Signed-off-by: Inki Dae --- .../mv_inference/inference/include/Inference.h | 3 +- .../mv_inference/inference/src/Inference.cpp | 46 ++++++++++++---------- .../inference/src/mv_inference_open.cpp | 25 +++--------- 3 files changed, 31 insertions(+), 43 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h index cad69e9..a0506f2 100644 --- a/mv_machine_learning/mv_inference/inference/include/Inference.h +++ b/mv_machine_learning/mv_inference/inference/include/Inference.h @@ -317,8 +317,7 @@ namespace inference * @since_tizen 5.5 * @return @c true on success, otherwise a negative error value */ - int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results, - int width, int height); + int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results); /** * @brief Gets the PoseLandmarkDetectionResults diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 756f041..c25a4a4 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1536,15 +1536,15 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int Inference::GetFacialLandMarkDetectionResults( - FacialLandMarkDetectionResults *detectionResults, int width, int height) + int Inference::GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results) { LOGI("ENTER"); - FacialLandMarkDetectionResults results; + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); if (outputMeta.IsParsed()) { auto& landmarkInfo = outputMeta.GetLandmark(); auto& scoreInfo = outputMeta.GetScore(); + if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) || !mOutputTensorBuffers.exist(scoreInfo.GetName())) { LOGE("output buffers named of %s or %s are NULL", @@ -1555,27 +1555,26 @@ namespace inference int heatMapWidth = 0; int heatMapHeight = 0; int heatMapChannel = 0; - if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { - heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; - heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; - heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; - } - - int number_of_landmarks = 0; std::vector channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); + int number_of_landmarks = heatMapChannel; + if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset(); } else { - number_of_landmarks = heatMapChannel; + heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; + heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; + heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; } + LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); // decoding PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta, heatMapWidth, heatMapHeight, heatMapChannel, number_of_landmarks); + // initialize decorder queue with landmarks to be decoded. int ret = poseDecoder.init(); if (ret != MEDIA_VISION_ERROR_NONE) { @@ -1585,21 +1584,24 @@ namespace inference float inputW = 1.f; float inputH = 1.f; + if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); } + float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + poseDecoder.decode(inputW, inputH, thresRadius); for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { - results.locations.push_back( + results->locations.push_back( cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast(mSourceSize.width), poseDecoder.getPointY(0, landmarkIndex) * static_cast(mSourceSize.height))); } - results.number_of_landmarks = results.locations.size(); - *detectionResults = results; + + results->number_of_landmarks = results->locations.size(); } else { tensor_t outputData; @@ -1611,22 +1613,24 @@ namespace inference } int number_of_detections = outputData.dimInfo[0][1] >> 1; - float *loc = reinterpret_cast(outputData.data[0]); - results.number_of_landmarks = number_of_detections; - results.locations.resize(number_of_detections); + + results->number_of_landmarks = number_of_detections; + results->locations.resize(number_of_detections); LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); - for (auto& point : results.locations) { + + float *loc = reinterpret_cast(outputData.data[0]); + + for (auto& point : results->locations) { point.x = static_cast(*loc++ * mSourceSize.width); point.y = static_cast(*loc++ * mSourceSize.height); LOGI("x:%d, y:%d", point.x, point.y); } - - *detectionResults = results; } + LOGI("Inference: FacialLandmarkDetectionResults: %d\n", - results.number_of_landmarks); + results->number_of_landmarks); return MEDIA_VISION_ERROR_NONE; } diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp index 41c62df..c945ac4 100644 --- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp @@ -765,20 +765,6 @@ int mv_inference_facial_landmark_detect_open( mv_inference_facial_landmark_detected_cb detected_cb, void *user_data) { Inference *pInfer = static_cast(infer); - unsigned int width, height; - - int ret = mv_source_get_width(source, &width); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get width"); - return ret; - } - - ret = mv_source_get_height(source, &height); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get height"); - return ret; - } - std::vector sources; std::vector rects; @@ -787,7 +773,7 @@ int mv_inference_facial_landmark_detect_open( if (roi != NULL) rects.push_back(*roi); - ret = pInfer->Run(sources, rects); + int ret = pInfer->Run(sources, rects); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to run inference"); return ret; @@ -795,8 +781,7 @@ int mv_inference_facial_landmark_detect_open( FacialLandMarkDetectionResults facialLandMarkDetectionResults; - ret = pInfer->GetFacialLandMarkDetectionResults( - &facialLandMarkDetectionResults, width, height); + ret = pInfer->GetFacialLandMarkDetectionResults(&facialLandMarkDetectionResults); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to get inference results"); return ret; @@ -805,9 +790,9 @@ int mv_inference_facial_landmark_detect_open( int numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks; std::vector locations(numberOfLandmarks); - for (int n = 0; n < numberOfLandmarks; ++n) { - locations[n].x = facialLandMarkDetectionResults.locations[n].x; - locations[n].y = facialLandMarkDetectionResults.locations[n].y; + for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) { + locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x; + locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y; } detected_cb(source, numberOfLandmarks, locations.data(), user_data); -- 2.7.4 From c750e58c3fc375e2f9070ed03bc24617bc5d1f3e Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Thu, 7 Oct 2021 19:03:23 +0900 Subject: [PATCH 03/16] mv_machine_learning: code refactoring to input and output metadata classes Did code refactoring to InputMetadata and OuputMetadata. The biggest change of this patch is to delegate all requests of various metadata relevant classes to InputMetadata and OutputMetadata classes to mitigate Inference class dependency of Metadata things. In addition, it makes several classes declared in BoxInfo class to get out , and changes class to struct in case that class functionality isn't needed. And some cleanup and code sliding. This is just one step for next code refactoring. Change-Id: I92c4e2b2c77499246c56a237282c05262550301a Signed-off-by: Inki Dae --- .../mv_inference/inference/include/InputMetadata.h | 10 +- .../mv_inference/inference/include/ObjectDecoder.h | 9 +- .../inference/include/OutputMetadata.h | 323 ++++++++++----------- .../mv_inference/inference/src/Inference.cpp | 203 +++++++------ .../mv_inference/inference/src/ObjectDecoder.cpp | 70 ++--- .../mv_inference/inference/src/OutputMetadata.cpp | 139 +++------ .../mv_inference/inference/src/PoseDecoder.cpp | 97 +++---- packaging/capi-media-vision.spec | 2 +- 8 files changed, 392 insertions(+), 461 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h index 8b5e2bd..bdd12c2 100644 --- a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h @@ -88,10 +88,6 @@ namespace inference class InputMetadata { public: - bool parsed; - std::map layer; - std::map option; - /** * @brief Creates an InputMetadata class instance. * @@ -113,11 +109,17 @@ namespace inference * @since_tizen 6.5 */ int Parse(JsonObject *root); + bool IsParsed(void) { return parsed; } + std::map& GetLayer() { return layer; } + std::map& GetOption() { return option; } private: + bool parsed; std::map mSupportedShapeType; std::map mSupportedDataType; std::map mSupportedColorSpace; + std::map layer; + std::map option; template static T GetSupportedType(JsonObject* root, std::string typeName, diff --git a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h index f5324f2..11c5cc2 100755 --- a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h +++ b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h @@ -44,13 +44,8 @@ namespace inference OutputMetadata mMeta; int mBoxOffset; int mNumberOfOjects; - - ScoreInfo& mScoreInfo; - BoxInfo& mBoxInfo; - float mScaleW; float mScaleH; - Boxes mResultBoxes; float decodeScore(int idx); @@ -62,9 +57,7 @@ namespace inference int boxOffset, float scaleW, float scaleH, int numberOfObjects = 0) : mTensorBuffer(buffer), mMeta(metaData), mBoxOffset(boxOffset), mNumberOfOjects(numberOfObjects), - mScoreInfo(mMeta.GetScore()), mBoxInfo(mMeta.GetBox()), - mScaleW(scaleW), mScaleH(scaleH), - mResultBoxes() { + mScaleW(scaleW), mScaleH(scaleH), mResultBoxes() { }; ~ObjectDecoder() = default; diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index ca4df80..d223726 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -42,6 +42,7 @@ namespace inference { private: std::vector dims; + public: std::vector GetValidIndexAll() const; void SetValidIndex(int index); @@ -52,6 +53,7 @@ namespace inference private: double scale; double zeropoint; + public: DeQuantization(double s, double z) : scale(s), zeropoint(z) {}; ~DeQuantization() = default; @@ -66,10 +68,9 @@ namespace inference std::string name; DimInfo dimInfo; double threshold; - inference_score_type_e type; int topNumber; + inference_score_type_e type; std::shared_ptr deQuantization; - std::map supportedScoreTypes; public: @@ -82,141 +83,130 @@ namespace inference inference_score_type_e GetType() { return type; } int GetTopNumber() { return topNumber; } std::shared_ptr GetDeQuant() { return deQuantization; } - int ParseScore(JsonObject *root); }; - class BoxInfo - { + struct AnchorParam { + int mode; /**< 0: generate anchor, 1:load pre-anchor*/ + int numLayers; + float minScale; + float maxScale; + int inputSizeHeight; + int inputSizeWidth; + float anchorOffsetX; + float anchorOffsetY; + std::vector strides; + std::vector aspectRatios; + bool isReduceBoxedInLowestLayer; + float interpolatedScaleAspectRatio; + bool isFixedAnchorSize; + bool isExponentialBoxScale; + float xScale; + float yScale; + float wScale; + float hScale; + }; + + struct NMSParam { + inference_box_nms_type_e mode; /**< 0: standard */ + float iouThreshold; + std::map supportedBoxNmsTypes; + }; + + struct RotateParam { + int startPointIndex; + int endPointIndex; + cv::Point2f startPoint; + cv::Point2f endPoint; + float baseAngle; + }; + + struct RoiOptionParam { + int startPointIndex; + int endPointIndex; + int centerPointIndex; + cv::Point2f centerPoint; + float shiftX; + float shiftY; + float scaleX; + float scaleY; + int mode; + }; + + class DecodeInfo { + private: + AnchorParam anchorParam; + std::vector anchorBoxes; + NMSParam nmsParam; + RotateParam rotParam; + RoiOptionParam roiOptParam; + public: - class DecodeInfo { - public: - class AnchorParam { - public: - int mode; /**< 0: generate anchor, 1:load pre-anchor*/ - int numLayers; - float minScale; - float maxScale; - int inputSizeHeight; - int inputSizeWidth; - float anchorOffsetX; - float anchorOffsetY; - std::vector strides; - std::vector aspectRatios; - bool isReduceBoxedInLowestLayer; - float interpolatedScaleAspectRatio; - bool isFixedAnchorSize; - bool isExponentialBoxScale; - float xScale; - float yScale; - float wScale; - float hScale; - - AnchorParam() = default; - ~AnchorParam() = default; - }; - - class NMSParam { - public: - inference_box_nms_type_e mode; /**< 0: standard */ - float iouThreshold; - - std::map supportedBoxNmsTypes; - - NMSParam() : mode(INFERENCE_BOX_NMS_TYPE_NONE), iouThreshold(0.2f) { - supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD}); - }; - - ~NMSParam() = default; - }; - - class RotateParam { - public: - int startPointIndex; - int endPointIndex; - cv::Point2f startPoint; - cv::Point2f endPoint; - float baseAngle; - - RotateParam() : startPointIndex(-1), - endPointIndex(-1), - startPoint(cv::Point2f(0.f,0.f)), - endPoint(cv::Point2f(0.f,0.f)), - baseAngle(0.f){}; - ~RotateParam() = default; - }; - - class RoiOptionParam { - public: - int startPointIndex; - int endPointIndex; - int centerPointIndex; - cv::Point2f centerPoint; - float shiftX; - float shiftY; - float scaleX; - float scaleY; - int mode; - - RoiOptionParam() : startPointIndex(-1), - endPointIndex(-1), - centerPointIndex(-1), - centerPoint(cv::Point2f(0.f, 0.f)), - shiftX(0.f), shiftY(0.f), - scaleX(1.f), scaleY(1.f), - mode(-1) {}; - ~RoiOptionParam() = default; - }; - - private: - AnchorParam anchorParam; - std::vector anchorBoxes; - NMSParam nmsParam; - RotateParam rotParam; - RoiOptionParam roiOptParam; - - public: - DecodeInfo() = default; - ~DecodeInfo() = default; - std::vector& GetAnchorBoxAll(); - bool IsAnchorBoxEmpty(); - void AddAnchorBox(cv::Rect2f& ahcnor); - void ClearAnchorBox(); - - // Anchor param - int ParseAnchorParam(JsonObject *root); - int GenerateAnchor(); - bool IsFixedAnchorSize(); - bool IsExponentialBoxScale(); - float GetAnchorXscale(); - float GetAnchorYscale(); - float GetAnchorWscale(); - float GetAnchorHscale(); - float CalculateScale(float min, float max, int index, int maxStride); - - // Nms param - int ParseNms(JsonObject *root); - int GetNmsMode(); - float GetNmsIouThreshold(); - - // Rotate param - int ParseRotate(JsonObject *root); - int GetRotStartPointIndex(); - int GetRotEndPointIndex(); - float GetBaseAngle(); - - // Roi option param - int ParseRoiOption(JsonObject *root); - int GetRoiMode(); - int GetRoiCenterPointIndex(); - int GetRoiStartPointIndex(); - int GetRoiEndPointIndex(); - float GetShiftX(); - float GetShiftY(); - float GetScaleX(); - float GetScaleY(); - }; + DecodeInfo() { + nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE; + nmsParam.iouThreshold = 0.2f; + nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD}); + + rotParam.startPointIndex = -1; + rotParam.endPointIndex = -1; + rotParam.startPoint = cv::Point2f(0.f,0.f); + rotParam.endPoint = cv::Point2f(0.f,0.f); + rotParam.baseAngle = 0.f; + + roiOptParam.startPointIndex = -1; + roiOptParam.endPointIndex = -1; + roiOptParam.centerPointIndex = -1; + roiOptParam.centerPoint = cv::Point2f(0.f, 0.f); + roiOptParam.shiftX = 0.f; + roiOptParam.shiftY = 0.f; + roiOptParam.scaleX = 1.f; + roiOptParam.scaleY = 1.f; + roiOptParam.mode = -1; + } + + ~DecodeInfo() = default; + + std::vector& GetAnchorBoxAll(); + bool IsAnchorBoxEmpty(); + void AddAnchorBox(cv::Rect2f& ahcnor); + void ClearAnchorBox(); + + // Anchor param + int ParseAnchorParam(JsonObject *root); + int GenerateAnchor(); + bool IsFixedAnchorSize(); + bool IsExponentialBoxScale(); + float GetAnchorXscale(); + float GetAnchorYscale(); + float GetAnchorWscale(); + float GetAnchorHscale(); + float CalculateScale(float min, float max, int index, int maxStride); + + // Nms param + int ParseNms(JsonObject *root); + int GetNmsMode(); + float GetNmsIouThreshold(); + + // Rotate param + int ParseRotate(JsonObject *root); + int GetRotStartPointIndex(); + int GetRotEndPointIndex(); + float GetBaseAngle(); + + // Roi option param + int ParseRoiOption(JsonObject *root); + int GetRoiMode(); + int GetRoiCenterPointIndex(); + int GetRoiStartPointIndex(); + int GetRoiEndPointIndex(); + float GetShiftX(); + float GetShiftY(); + float GetScaleX(); + float GetScaleY(); + }; + class BoxInfo + { private: std::string name; DimInfo dimInfo; @@ -275,25 +265,16 @@ namespace inference int ParseNumber(JsonObject *root); }; + struct HeatMapInfo { + int wIdx; + int hIdx; + int cIdx; + float nmsRadius; + inference_tensor_shape_type_e shapeType; + }; + class Landmark { - public: - class DecodeInfo { - public: - class HeatMapInfo { - public: - int wIdx; - int hIdx; - int cIdx; - inference_tensor_shape_type_e shapeType; - float nmsRadius; - HeatMapInfo() = default; - ~HeatMapInfo() = default; - }; - HeatMapInfo heatMap; - DecodeInfo() = default; - ~DecodeInfo() = default; - }; private: std::string name; DimInfo dimInfo; @@ -303,7 +284,7 @@ namespace inference inference_landmark_decoding_type_e decodingType; /**< 0: decoding unnecessary, 1: decoding heatmap, 2: decoding heatmap with refinement */ - DecodeInfo decodingInfo; + HeatMapInfo heatMapInfo; std::map supportedLandmarkTypes; std::map supportedLandmarkCoordinateTypes; @@ -318,7 +299,7 @@ namespace inference int GetOffset(); inference_landmark_coorindate_type_e GetCoordinate(); inference_landmark_decoding_type_e GetDecodingType(); - DecodeInfo& GetDecodingInfo(); + HeatMapInfo& GetHeatMapInfo(); int ParseLandmark(JsonObject *root); }; @@ -366,7 +347,7 @@ namespace inference Edge() = default; ~Edge() = default; int ParseEdge(JsonObject *root); - std::vector>& GetEdgesAll(); + std::vector>& GetEdgesAll() { return edges; } }; class OutputMetadata @@ -416,20 +397,38 @@ namespace inference */ int Parse(JsonObject *root); - bool IsParsed(); - ScoreInfo& GetScore(); - BoxInfo& GetBox(); - Label& GetLabel(); - Number& GetNumber(); - Landmark& GetLandmark(); - OffsetVec& GetOffset(); - std::vector& GetDispVecAll(); - Edge& GetEdge(); + bool IsParsed() { return parsed; } + + std::string GetScoreName() { return score.GetName(); } + DimInfo GetScoreDimInfo() { return score.GetDimInfo(); } + inference_score_type_e GetScoreType() { return score.GetType(); } + double GetScoreThreshold() { return score.GetThresHold(); } + int GetScoreTopNumber() { return score.GetTopNumber(); } + std::shared_ptr GetScoreDeQuant() { return score.GetDeQuant(); } + std::string GetBoxName() { return box.GetName(); } + DimInfo GetBoxDimInfo() { return box.GetDimInfo(); } + std::vector GetBoxOrder() { return box.GetOrder(); } + DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); } + inference_box_type_e GetBoxType() { return box.GetType(); } + int GetScoreCoordinate() { return box.GetCoordinate(); } + std::string GetLabelName() { return label.GetName(); } + std::string GetNumberName() { return number.GetName(); } + DimInfo GetNumberDimInfo() { return number.GetDimInfo(); } + std::string GetLandmarkName() { return landmark.GetName(); } + int GetLandmarkOffset() { return landmark.GetOffset(); } + inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); } + DimInfo GetLandmarkDimInfo() { return landmark.GetDimInfo(); } + HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); } + inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); } + inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); } + std::string GetOffsetVecName() { return offsetVec.GetName(); } + inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); } + std::vector& GetDispVecAll() { return dispVecs; } + std::vector>& GetEdges() { return edgeMap.GetEdgesAll(); } template static T GetSupportedType(JsonObject* root, std::string typeName, std::map& supportedTypes); }; - } /* Inference */ } /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index c25a4a4..05c9c07 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -371,10 +371,9 @@ namespace inference mConfig.mDataType = static_cast(dataType); mConfig.mInputLayerNames = names; - const InputMetadata& inputMeta = mMetadata.GetInputMeta(); - if (inputMeta.parsed) { + if (mMetadata.GetInputMeta().IsParsed()) { LOGI("use input meta"); - auto& layerInfo = inputMeta.layer.begin()->second; + auto& layerInfo = mMetadata.GetInputMeta().GetLayer().begin()->second; if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW mConfig.mTensorInfo.ch = layerInfo.dims[1]; mConfig.mTensorInfo.dim = layerInfo.dims[0]; @@ -389,8 +388,8 @@ namespace inference LOGE("Invalid shape type[%d]", layerInfo.shapeType); } - if (!inputMeta.option.empty()) { - auto& option = inputMeta.option.begin()->second; + if (!mMetadata.GetInputMeta().GetOption().empty()) { + auto& option = mMetadata.GetInputMeta().GetOption().begin()->second; if (option.normalization.use) { mConfig.mMeanValue = option.normalization.mean[0]; mConfig.mStdValue = option.normalization.std[0]; @@ -399,7 +398,7 @@ namespace inference mConfig.mDataType = layerInfo.dataType; mConfig.mInputLayerNames.clear(); - for (auto& layer : inputMeta.layer) { + for (auto& layer : mMetadata.GetInputMeta().GetLayer()) { mConfig.mInputLayerNames.push_back(layer.first); } } @@ -444,25 +443,25 @@ namespace inference mConfig.mOutputLayerNames = names; OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { + if (mMetadata.GetOutputMeta().IsParsed()) { mConfig.mOutputLayerNames.clear(); - if (!outputMeta.GetScore().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetScore().GetName()); + if (!outputMeta.GetScoreName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetScoreName()); - if (!outputMeta.GetBox().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetBox().GetName()); + if (!outputMeta.GetBoxName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName()); - if (!outputMeta.GetLabel().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetLabel().GetName()); + if (!outputMeta.GetLabelName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetLabelName()); - if (!outputMeta.GetNumber().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName()); + if (!outputMeta.GetNumberName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetNumberName()); - if (!outputMeta.GetLandmark().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName()); + if (!outputMeta.GetLandmarkName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName()); - if (!outputMeta.GetOffset().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName()); + if (!outputMeta.GetOffsetVecName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName()); for (auto& dispVec : outputMeta.GetDispVecAll()) { mConfig.mOutputLayerNames.push_back(dispVec.GetName()); @@ -1044,12 +1043,11 @@ namespace inference return MEDIA_VISION_ERROR_INVALID_PARAMETER; } - const InputMetadata& inputMeta = mMetadata.GetInputMeta(); - if (inputMeta.parsed) { + if (mMetadata.GetInputMeta().IsParsed()) { for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) { inference_engine_tensor_buffer& tensor_buffer = buffer.second; - const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first); - const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first); + const LayerInfo& layerInfo = mMetadata.GetInputMeta().GetLayer().at(buffer.first); + const Options& opt = mMetadata.GetInputMeta().GetOption().empty() ? Options() : mMetadata.GetInputMeta().GetOption().at(buffer.first); int data_type = ConvertToCv(tensor_buffer.data_type); @@ -1095,51 +1093,50 @@ namespace inference int Inference::GetClassficationResults(ImageClassificationResults &results) { - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); // Will contain top N results in ascending order. std::vector> topScore; auto threadHold = mConfig.mConfidenceThresHold; results.number_of_classes = 0; - if (outputMeta.IsParsed()) { - auto& info = outputMeta.GetScore(); - std::vector indexes = info.GetDimInfo().GetValidIndexAll(); + if (mMetadata.GetOutputMeta().IsParsed()) { + OutputMetadata outputMetadata = mMetadata.GetOutputMeta(); + std::vector indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll(); if (indexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - int classes = mOutputLayerProperty.layers[info.GetName()].shape[indexes[0]]; + int classes = mOutputLayerProperty.layers[outputMetadata.GetScoreName()].shape[indexes[0]]; - if (!mOutputTensorBuffers.exist(info.GetName())) { + if (!mOutputTensorBuffers.exist(outputMetadata.GetScoreName())) { LOGE("output buffe is NULL"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } PostProcess postProc; - postProc.ScoreClear(info.GetTopNumber()); - threadHold = info.GetThresHold(); + postProc.ScoreClear(outputMetadata.GetScoreTopNumber()); + threadHold = outputMetadata.GetScoreThreshold(); for (int cId = 0; cId < classes; ++cId) { float value = 0.0f; try { - value = mOutputTensorBuffers.getValue(info.GetName(), cId); + value = mOutputTensorBuffers.getValue(outputMetadata.GetScoreName(), cId); } catch (const std::exception& e) { LOGE(" Fail to get getValue with %s", e.what()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - if (info.GetDeQuant()) { + if (outputMetadata.GetScoreDeQuant()) { value = PostProcess::dequant(value, - info.GetDeQuant()->GetScale(), - info.GetDeQuant()->GetZeroPoint()); + outputMetadata.GetScoreDeQuant()->GetScale(), + outputMetadata.GetScoreDeQuant()->GetZeroPoint()); } - if (info.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) + if (outputMetadata.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) value = PostProcess::sigmoid(value); if (value < threadHold) @@ -1212,46 +1209,45 @@ namespace inference int Inference::GetObjectDetectionResults( ObjectDetectionResults *detectionResults) { - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { + if (mMetadata.GetOutputMeta().IsParsed()) { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + // decoding type - auto& boxInfo = outputMeta.GetBox(); - auto& scoreInfo = outputMeta.GetScore(); - if (!mOutputTensorBuffers.exist(boxInfo.GetName()) || - !mOutputTensorBuffers.exist(scoreInfo.GetName()) ){ + if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) || + !mOutputTensorBuffers.exist(outputMeta.GetScoreName()) ){ LOGE("output buffers named of %s or %s are NULL", - boxInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } int boxOffset = 0; int numberOfObjects = 0; - if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { + std::vector boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); if (boxIndexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; } else { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + std::vector boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); if (boxIndexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; - std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); + std::vector scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll(); if (scoreIndexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - numberOfObjects = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]]; + numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]]; } ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset, - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()), - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()), + static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()), + static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()), numberOfObjects); objDecoder.init(); @@ -1378,46 +1374,45 @@ namespace inference int Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults) { - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { + if (mMetadata.GetOutputMeta().IsParsed()) { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + // decoding type - auto& boxInfo = outputMeta.GetBox(); - auto& scoreInfo = outputMeta.GetScore(); - if (!mOutputTensorBuffers.exist(boxInfo.GetName()) || - !mOutputTensorBuffers.exist(scoreInfo.GetName())){ + if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) || + !mOutputTensorBuffers.exist(outputMeta.GetScoreName())){ LOGE("output buffers named of %s or %s are NULL", - boxInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } int boxOffset = 0; int numberOfFaces = 0; - if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { + std::vector boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); if (boxIndexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; } else { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + std::vector boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); if (boxIndexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; - std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); + std::vector scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll(); if (scoreIndexes.size() != 1) { LOGE("Invaid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - numberOfFaces = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]]; + numberOfFaces = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]]; } ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset, - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()), - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()), + static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()), + static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()), numberOfFaces); objDecoder.init(); @@ -1540,32 +1535,30 @@ namespace inference { LOGI("ENTER"); - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { - auto& landmarkInfo = outputMeta.GetLandmark(); - auto& scoreInfo = outputMeta.GetScore(); + if (mMetadata.GetOutputMeta().IsParsed()) { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) || - !mOutputTensorBuffers.exist(scoreInfo.GetName())) { + if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) || + !mOutputTensorBuffers.exist(outputMeta.GetScoreName())) { LOGE("output buffers named of %s or %s are NULL", - landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } int heatMapWidth = 0; int heatMapHeight = 0; int heatMapChannel = 0; - std::vector channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); + std::vector channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll(); int number_of_landmarks = heatMapChannel; - if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); - number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] - / landmarkInfo.GetOffset(); + number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] + / outputMeta.GetLandmarkOffset(); } else { - heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; - heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; - heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; + heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx]; + heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx]; + heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx]; } LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); @@ -1585,13 +1578,13 @@ namespace inference float inputW = 1.f; float inputH = 1.f; - if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { - inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); - inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); + if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { + inputW = static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()); + inputH = static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()); } - float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : - outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : + outputMeta.GetLandmarkHeatMapInfo().nmsRadius; poseDecoder.decode(inputW, inputH, thresRadius); @@ -1639,17 +1632,15 @@ namespace inference { LOGI("ENTER"); - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); auto poseResult = std::make_unique(); - if (outputMeta.IsParsed()) { - auto& landmarkInfo = outputMeta.GetLandmark(); - auto& scoreInfo = outputMeta.GetScore(); + if (mMetadata.GetOutputMeta().IsParsed()) { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) || - !mOutputTensorBuffers.exist(scoreInfo.GetName())) { + if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) || + !mOutputTensorBuffers.exist(outputMeta.GetScoreName())) { LOGE("output buffers named of %s or %s are NULL", - landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } @@ -1657,23 +1648,23 @@ namespace inference int heatMapHeight = 0; int heatMapChannel = 0; - if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { - heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; - heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; - heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; + if (outputMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx]; + heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx]; + heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx]; } LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); - std::vector channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); + std::vector channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll(); // If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS, // the landmarkChannel is guessed from the shape of the landmark output tensor. // Otherwise, it is guessed from the heatMapChannel. (heatMapChannel is used in default). int landmarkChannel = heatMapChannel; - if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) - landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset(); + if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) + landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset(); poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel : static_cast(mUserListName.size()); @@ -1700,11 +1691,11 @@ namespace inference float inputW = 1.f; float inputH = 1.f; - float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : - outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; - if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { - inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); - inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); + float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : + outputMeta.GetLandmarkHeatMapInfo().nmsRadius; + if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { + inputW = static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()); + inputH = static_cast(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()); } poseDecoder.decode(inputW, inputH, thresRadius); diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp index 2831edc..4d5e36e 100755 --- a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp @@ -27,16 +27,16 @@ namespace inference { int ObjectDecoder::init() { - if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) || - !mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) { + if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { + if (!mTensorBuffer.exist(mMeta.GetLabelName()) || + !mTensorBuffer.exist(mMeta.GetNumberName()) ) { LOGE("buffer buffers named of %s or %s are NULL", - mMeta.GetLabel().GetName().c_str(), - mMeta.GetNumber().GetName().c_str()); + mMeta.GetLabelName().c_str(), mMeta.GetNumberName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; } - std::vector indexes = mMeta.GetNumber().GetDimInfo().GetValidIndexAll(); + std::vector indexes = mMeta.GetNumberDimInfo().GetValidIndexAll(); if (indexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; @@ -45,9 +45,9 @@ namespace inference // mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS. // Otherwise it is set already within ctor. mNumberOfOjects = mTensorBuffer.getValue( - mMeta.GetNumber().GetName(), indexes[0]); + mMeta.GetNumberName(), indexes[0]); } else { - if (mBoxInfo.GetDecodeInfo().IsAnchorBoxEmpty()) { + if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) { LOGE("Anchor boxes are required but empty."); return MEDIA_VISION_ERROR_INVALID_OPERATION; } @@ -58,32 +58,32 @@ namespace inference float ObjectDecoder::decodeScore(int idx) { - float score = mTensorBuffer.getValue(mScoreInfo.GetName(), idx); - if (mScoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { + float score = mTensorBuffer.getValue(mMeta.GetScoreName(), idx); + if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) { score = PostProcess::sigmoid(score); } - return score < mScoreInfo.GetThresHold() ? 0.0f : score; + return score < mMeta.GetScoreThreshold() ? 0.0f : score; } Box ObjectDecoder::decodeBox(int idx, float score, int label) { // assume type is (cx,cy,w,h) // left or cx - float cx = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[0]); + float cx = mTensorBuffer.getValue(mMeta.GetBoxName(), + idx * mBoxOffset + mMeta.GetBoxOrder()[0]); // top or cy - float cy = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[1]); + float cy = mTensorBuffer.getValue(mMeta.GetBoxName(), + idx * mBoxOffset + mMeta.GetBoxOrder()[1]); // right or width - float cWidth = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[2]); + float cWidth = mTensorBuffer.getValue(mMeta.GetBoxName(), + idx * mBoxOffset + mMeta.GetBoxOrder()[2]); // bottom or height - float cHeight = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[3]); + float cHeight = mTensorBuffer.getValue(mMeta.GetBoxName(), + idx * mBoxOffset + mMeta.GetBoxOrder()[3]); // convert type to ORIGIN_CENTER if ORIGIN_LEFTTOP - if (mBoxInfo.GetType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) { + if (mMeta.GetBoxType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) { float tmpCx = cx; float tmpCy = cy; cx = (cx + cWidth) * 0.5f; // (left + right)/2 @@ -93,7 +93,7 @@ namespace inference } // convert coordinate to RATIO if PIXEL - if (mBoxInfo.GetCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) { + if (mMeta.GetScoreCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) { cx /= mScaleW; cy /= mScaleH; cWidth /= mScaleW; @@ -101,9 +101,9 @@ namespace inference } Box box = { - .index = mMeta.GetLabel().GetName().empty() ? + .index = mMeta.GetLabelName().empty() ? label : - mTensorBuffer.getValue(mMeta.GetLabel().GetName(), idx), + mTensorBuffer.getValue(mMeta.GetLabelName(), idx), .score = score, .location = cv::Rect2f(cx, cy, cWidth, cHeight) }; @@ -116,26 +116,26 @@ namespace inference // location coordinate of box, the output of decodeBox(), is relative between 0 ~ 1 Box box = decodeBox(anchorIdx, score, idx); - if (mBoxInfo.GetDecodeInfo().IsFixedAnchorSize()) { + if (mMeta.GetBoxDecodeInfo().IsFixedAnchorSize()) { box.location.x += anchor.x; box.location.y += anchor.y; } else { - box.location.x = box.location.x / mBoxInfo.GetDecodeInfo().GetAnchorXscale() * + box.location.x = box.location.x / mMeta.GetBoxDecodeInfo().GetAnchorXscale() * anchor.width + anchor.x; - box.location.y = box.location.y / mBoxInfo.GetDecodeInfo().GetAnchorYscale() * + box.location.y = box.location.y / mMeta.GetBoxDecodeInfo().GetAnchorYscale() * anchor.height + anchor.y; } - if (mBoxInfo.GetDecodeInfo().IsExponentialBoxScale()) { + if (mMeta.GetBoxDecodeInfo().IsExponentialBoxScale()) { box.location.width = anchor.width * - std::exp(box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale()); + std::exp(box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale()); box.location.height = anchor.height * - std::exp(box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale()); + std::exp(box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale()); } else { box.location.width = anchor.width * - box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale(); + box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale(); box.location.height = anchor.height * - box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale(); + box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale(); } return box; @@ -147,7 +147,7 @@ namespace inference int ret = MEDIA_VISION_ERROR_NONE; for (int idx = 0; idx < mNumberOfOjects; ++idx) { - if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { + if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { float score = decodeScore(idx); if (score <= 0.0f) continue; @@ -158,7 +158,7 @@ namespace inference int anchorIdx = -1; Boxes boxes; - for (auto& anchorBox : mBoxInfo.GetDecodeInfo().GetAnchorBoxAll()) { + for (auto& anchorBox : mMeta.GetBoxDecodeInfo().GetAnchorBoxAll()) { anchorIdx++; float score = decodeScore(anchorIdx * mNumberOfOjects + idx); @@ -176,8 +176,8 @@ namespace inference if (!boxList.empty()) { PostProcess postProc; ret = postProc.Nms(boxList, - mBoxInfo.GetDecodeInfo().GetNmsMode(), - mBoxInfo.GetDecodeInfo().GetNmsIouThreshold(), + mMeta.GetBoxDecodeInfo().GetNmsMode(), + mMeta.GetBoxDecodeInfo().GetNmsIouThreshold(), mResultBoxes); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to non-maximum suppression[%d]", ret); diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 8dec322..176b0eb 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -35,6 +35,7 @@ namespace inference score(), box(), label(), + number(), landmark(), offsetVec(), dispVecs(), @@ -47,8 +48,8 @@ namespace inference name(), dimInfo(), threshold(0.0), - type(INFERENCE_SCORE_TYPE_NORMAL), topNumber(1), + type(INFERENCE_SCORE_TYPE_NORMAL), deQuantization(nullptr) { // Score type @@ -195,22 +196,22 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - void BoxInfo::DecodeInfo::AddAnchorBox(cv::Rect2f& anchor) + void DecodeInfo::AddAnchorBox(cv::Rect2f& anchor) { anchorBoxes.push_back(anchor); } - void BoxInfo::DecodeInfo::ClearAnchorBox() + void DecodeInfo::ClearAnchorBox() { anchorBoxes.clear(); } - std::vector& BoxInfo::DecodeInfo::GetAnchorBoxAll() + std::vector& DecodeInfo::GetAnchorBoxAll() { return anchorBoxes; } - bool BoxInfo::DecodeInfo::IsAnchorBoxEmpty() + bool DecodeInfo::IsAnchorBoxEmpty() { return anchorBoxes.empty(); } @@ -374,7 +375,7 @@ namespace inference } - int BoxInfo::DecodeInfo::ParseAnchorParam(JsonObject *root) + int DecodeInfo::ParseAnchorParam(JsonObject *root) { JsonObject *object = json_object_get_object_member(root, "anchor") ; @@ -420,45 +421,43 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - float BoxInfo::DecodeInfo::CalculateScale(float min, float max, int index, int maxStride) + float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride) { return min + (max - min) * 1.0 * index / (maxStride - 1.0f); } - bool BoxInfo::DecodeInfo::IsFixedAnchorSize() + bool DecodeInfo::IsFixedAnchorSize() { return this->anchorParam.isFixedAnchorSize;; } - bool BoxInfo::DecodeInfo::IsExponentialBoxScale() + bool DecodeInfo::IsExponentialBoxScale() { return this->anchorParam.isExponentialBoxScale; } - float BoxInfo::DecodeInfo::GetAnchorXscale() + float DecodeInfo::GetAnchorXscale() { return this->anchorParam.xScale; } - float BoxInfo::DecodeInfo::GetAnchorYscale() + float DecodeInfo::GetAnchorYscale() { return this->anchorParam.yScale; } - float BoxInfo::DecodeInfo::GetAnchorWscale() + float DecodeInfo::GetAnchorWscale() { return this->anchorParam.wScale; } - float BoxInfo::DecodeInfo::GetAnchorHscale() + float DecodeInfo::GetAnchorHscale() { return this->anchorParam.hScale; } - int BoxInfo::DecodeInfo::GenerateAnchor() + int DecodeInfo::GenerateAnchor() { - //BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo(); - if (this->anchorParam.strides.empty() || this->anchorParam.aspectRatios.empty()) { LOGE("Invalid anchor parameters"); @@ -552,7 +551,7 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int BoxInfo::DecodeInfo::ParseNms(JsonObject *root) + int DecodeInfo::ParseNms(JsonObject *root) { if (!json_object_has_member(root, "nms")) { LOGI("nms is empty. skip it"); @@ -572,17 +571,17 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int BoxInfo::DecodeInfo::GetNmsMode() + int DecodeInfo::GetNmsMode() { return this->nmsParam.mode; } - float BoxInfo::DecodeInfo::GetNmsIouThreshold() + float DecodeInfo::GetNmsIouThreshold() { return this->nmsParam.iouThreshold; } - int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root) + int DecodeInfo::ParseRotate(JsonObject *root) { if (!json_object_has_member(root, "rotate")) { LOGI("rotate is empty. skip it"); @@ -597,62 +596,62 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int BoxInfo::DecodeInfo::GetRotStartPointIndex() + int DecodeInfo::GetRotStartPointIndex() { return this->rotParam.startPointIndex; } - int BoxInfo::DecodeInfo::GetRotEndPointIndex() + int DecodeInfo::GetRotEndPointIndex() { return this->rotParam.endPointIndex; } - float BoxInfo::DecodeInfo::GetBaseAngle() + float DecodeInfo::GetBaseAngle() { return this->rotParam.baseAngle; } - int BoxInfo::DecodeInfo::GetRoiMode() + int DecodeInfo::GetRoiMode() { return this->roiOptParam.mode; } - int BoxInfo::DecodeInfo::GetRoiStartPointIndex() + int DecodeInfo::GetRoiStartPointIndex() { return this->roiOptParam.startPointIndex; } - int BoxInfo::DecodeInfo::GetRoiEndPointIndex() + int DecodeInfo::GetRoiEndPointIndex() { return this->roiOptParam.endPointIndex; } - int BoxInfo::DecodeInfo::GetRoiCenterPointIndex() + int DecodeInfo::GetRoiCenterPointIndex() { return this->roiOptParam.centerPointIndex; } - float BoxInfo::DecodeInfo::GetShiftX() + float DecodeInfo::GetShiftX() { return this->roiOptParam.shiftX; } - float BoxInfo::DecodeInfo::GetShiftY() + float DecodeInfo::GetShiftY() { return this->roiOptParam.shiftY; } - float BoxInfo::DecodeInfo::GetScaleX() + float DecodeInfo::GetScaleX() { return this->roiOptParam.scaleX; } - float BoxInfo::DecodeInfo::GetScaleY() + float DecodeInfo::GetScaleY() { return this->roiOptParam.scaleY; } - int BoxInfo::DecodeInfo::ParseRoiOption(JsonObject *root) + int DecodeInfo::ParseRoiOption(JsonObject *root) { if (!json_object_has_member(root, "roi")) { LOGI("roi is empty. skip it"); @@ -672,56 +671,6 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - ScoreInfo& OutputMetadata::GetScore() - { - return score; - } - - BoxInfo& OutputMetadata::GetBox() - { - return box; - } - - Label& OutputMetadata::GetLabel() - { - return label; - } - - Number& OutputMetadata::GetNumber() - { - return number; - } - - Landmark& OutputMetadata::GetLandmark() - { - return landmark; - } - - OffsetVec& OutputMetadata::GetOffset() - { - return offsetVec; - } - - std::vector& OutputMetadata::GetDispVecAll() - { - return dispVecs; - } - - Edge& OutputMetadata::GetEdge() - { - return edgeMap; - } - - std::vector>& Edge::GetEdgesAll() - { - return edges; - } - - bool OutputMetadata::IsParsed() - { - return parsed; - } - Landmark::Landmark() : name(), dimInfo(), @@ -729,7 +678,7 @@ namespace inference offset(), coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO), decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS), - decodingInfo() + heatMapInfo() { supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE}); @@ -805,9 +754,9 @@ namespace inference return decodingType; } - Landmark::DecodeInfo& Landmark::GetDecodingInfo() + HeatMapInfo& Landmark::GetHeatMapInfo() { - return decodingInfo; + return heatMapInfo; } int OutputMetadata::ParseLandmark(JsonObject *root) @@ -863,26 +812,26 @@ namespace inference JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; try { - landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes); + landmark.GetHeatMapInfo().shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes); } catch (const std::exception& e) { LOGE("Invalid %s", e.what()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } std::vector heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll(); - if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { - landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0]; - landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1]; - landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2]; + if (landmark.GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + landmark.GetHeatMapInfo().cIdx = heatMapIndexes[0]; + landmark.GetHeatMapInfo().hIdx = heatMapIndexes[1]; + landmark.GetHeatMapInfo().wIdx = heatMapIndexes[2]; } else { - landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[0]; - landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[1]; - landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[2]; + landmark.GetHeatMapInfo().hIdx = heatMapIndexes[0]; + landmark.GetHeatMapInfo().wIdx = heatMapIndexes[1]; + landmark.GetHeatMapInfo().cIdx = heatMapIndexes[2]; } if (json_object_has_member(object, "nms_radius")) { - landmark.GetDecodingInfo().heatMap.nmsRadius = static_cast(json_object_get_double_member(object, "nms_radius")); - LOGI("nms is enabled with %3.f", landmark.GetDecodingInfo().heatMap.nmsRadius ); + landmark.GetHeatMapInfo().nmsRadius = static_cast(json_object_get_double_member(object, "nms_radius")); + LOGI("nms is enabled with %3.f", landmark.GetHeatMapInfo().nmsRadius ); } } diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index 1ae33a7..a1efd2d 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -50,15 +50,13 @@ namespace inference { LOGI("ENTER"); - Landmark& landmarkInfo = mMeta.GetLandmark(); - - if (landmarkInfo.GetType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE || - landmarkInfo.GetType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) { LOGE("Not supported landmark type"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { LOGI("Skip init"); return MEDIA_VISION_ERROR_NONE; } @@ -68,12 +66,11 @@ namespace inference float score, localScore; int idx; bool isLocalMax; - ScoreInfo& scoreInfo = mMeta.GetScore(); mCandidates.clear(); - if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mCandidates.resize(mHeatMapChannel); } @@ -83,16 +80,16 @@ namespace inference for (c = 0; c < mHeatMapChannel; ++c, candidate++) { isLocalMax = true; idx = convertXYZtoX(x, y, c); - score = mTensorBuffer.getValue(scoreInfo.GetName(), idx); - if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { + score = mTensorBuffer.getValue(mMeta.GetScoreName(), idx); + if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) { score = PostProcess::sigmoid(score); } - if (score < scoreInfo.GetThresHold()) + if (score < mMeta.GetScoreThreshold()) continue; - if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { if (score <= candidate->score) continue; @@ -110,8 +107,8 @@ namespace inference for (dy = sy; dy < ey; ++dy) { for (dx = sx; dx < ex; ++dx) { idx = convertXYZtoX(dx, dy, c); - localScore = mTensorBuffer.getValue(scoreInfo.GetName(), idx); - if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { + localScore = mTensorBuffer.getValue(mMeta.GetScoreName(), idx); + if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) { localScore = PostProcess::sigmoid(localScore); } if (localScore > score) { @@ -159,7 +156,7 @@ namespace inference int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal) { - if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) { + if (!mTensorBuffer.exist(mMeta.GetOffsetVecName())) { offsetVal.x = offsetVal.y = 0.f; LOGI("No offset value"); LOGI("LEAVE"); @@ -169,11 +166,11 @@ namespace inference cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id); try { - offsetVal.x = mTensorBuffer.getValue(mMeta.GetOffset().GetName(), idx.x); - offsetVal.y = mTensorBuffer.getValue(mMeta.GetOffset().GetName(), idx.y); + offsetVal.x = mTensorBuffer.getValue(mMeta.GetOffsetVecName(), idx.x); + offsetVal.y = mTensorBuffer.getValue(mMeta.GetOffsetVecName(), idx.y); } catch (const std::exception& e) { LOGE("Fail to get value at (%d, %d) from %s", - idx.x, idx.y, mMeta.GetOffset().GetName().c_str()); + idx.x, idx.y, mMeta.GetOffsetVecName().c_str()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } @@ -241,30 +238,28 @@ namespace inference } mPoseLandmarks.clear(); - LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false}; - Landmark& landmarkInfo = mMeta.GetLandmark(); - ScoreInfo& scoreInfo = mMeta.GetScore(); + LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false}; - if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mPoseLandmarks.resize(1); - if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks); } else { mPoseLandmarks[0].landmarks.resize(mHeatMapChannel); } } - if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (mMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { while (!mCandidates.empty()) { LandmarkPoint &root = mCandidates.front(); getIndexToPos(root, scaleWidth, scaleHeight); - if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) { + if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) { root.valid = true; mPoseLandmarks[0].landmarks[root.id] = root; mPoseLandmarks[0].score += root.score; @@ -318,27 +313,27 @@ namespace inference } } else { // multi pose is not supported - std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); - float poseScore = scoreInfo.GetThresHold(); + std::vector scoreIndexes = mMeta.GetScoreDimInfo().GetValidIndexAll(); + float poseScore = mMeta.GetScoreThreshold(); if (!scoreIndexes.empty()) { - poseScore = mTensorBuffer.getValue(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]); - if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { + poseScore = mTensorBuffer.getValue(mMeta.GetScoreName(), scoreIndexes[scoreIndexes[0]]); + if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) { poseScore = PostProcess::sigmoid(poseScore); } - if (poseScore < scoreInfo.GetThresHold()) { - LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, scoreInfo.GetThresHold()); + if (poseScore < mMeta.GetScoreThreshold()) { + LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, mMeta.GetScoreThreshold()); return MEDIA_VISION_ERROR_NONE; } } - int landmarkOffset = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3; - if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { - landmarkOffset = landmarkInfo.GetOffset(); + int landmarkOffset = (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3; + if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + landmarkOffset = mMeta.GetLandmarkOffset(); } for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { - float px = mTensorBuffer.getValue(landmarkInfo.GetName(), idx * landmarkOffset); - float py = mTensorBuffer.getValue(landmarkInfo.GetName(), idx * landmarkOffset + 1); + float px = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset); + float py = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset + 1); mPoseLandmarks[0].landmarks[idx].score = poseScore; mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1); @@ -371,9 +366,9 @@ namespace inference LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y); LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score); - int index = static_cast(mMeta.GetEdge().GetEdgesAll().size()) - 1; - for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin(); - riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) { + int index = static_cast(mMeta.GetEdges().size()) - 1; + for (auto riter = mMeta.GetEdges().rbegin(); + riter != mMeta.GetEdges().rend(); ++riter) { int fromKeyId = riter->second; int toKeyId = riter->first; @@ -392,8 +387,8 @@ namespace inference } index = 0; - for (auto iter = mMeta.GetEdge().GetEdgesAll().begin(); - iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) { + for (auto iter = mMeta.GetEdges().begin(); + iter != mMeta.GetEdges().end(); ++iter) { int fromKeyId = iter->first; int toKeyId = iter->second; @@ -449,8 +444,8 @@ namespace inference } int idx = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id); - toLandmark.score = mTensorBuffer.getValue(mMeta.GetScore().GetName(), idx); - if (mMeta.GetScore().GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { + toLandmark.score = mTensorBuffer.getValue(mMeta.GetScoreName(), idx); + if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) { toLandmark.score = PostProcess::sigmoid(toLandmark.score); } @@ -464,12 +459,14 @@ namespace inference { LOGI("ENTER"); - LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size()); + LOGI("edge size: %zd", mMeta.GetEdges().size()); + int idxY = index.y * mHeatMapWidth - * static_cast(mMeta.GetEdge().GetEdgesAll().size()) * 2; - idxY += index.x * static_cast(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId; + * static_cast(mMeta.GetEdges().size()) * 2; + + idxY += index.x * static_cast(mMeta.GetEdges().size()) * 2 + edgeId; - int idxX = idxY + static_cast(mMeta.GetEdge().GetEdgesAll().size()); + int idxX = idxY + static_cast(mMeta.GetEdges().size()); for(auto& dispVec : mMeta.GetDispVecAll()){ if (dispVec.GetType() == type) { // 0: forward diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index ee7e44e..205f377 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.8.17 +Version: 0.8.18 Release: 1 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause -- 2.7.4 From 20caea0dc7ad4eb0297da672601b13452717d498 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 13 Oct 2021 18:55:41 +0900 Subject: [PATCH 04/16] mv_machine_learning: code cleanup to Inference class Cleaned up Inference class. What this patch does, - code sliding. - change variable name to meaningful one such as 'n' to 'output_idx'. - drop unnecessary variable. - use same parameter name for Get*Results functions. Change-Id: I47ac3eb241116174e4a6a7bc2a1b90ab9378de25 Signed-off-by: Inki Dae --- .../mv_inference/inference/include/Inference.h | 6 +- .../mv_inference/inference/src/Inference.cpp | 138 +++++++++------------ .../inference/src/mv_inference_open.cpp | 26 ++-- 3 files changed, 75 insertions(+), 95 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h index a0506f2..95f285f 100644 --- a/mv_machine_learning/mv_inference/inference/include/Inference.h +++ b/mv_machine_learning/mv_inference/inference/include/Inference.h @@ -293,7 +293,7 @@ namespace inference * @since_tizen 5.5 * @return @c true on success, otherwise a negative error value */ - int GetClassficationResults(ImageClassificationResults &classificationResults); + int GetClassficationResults(ImageClassificationResults *results); /** * @brief Gets the ObjectDetectioResults @@ -301,7 +301,7 @@ namespace inference * @since_tizen 5.5 * @return @c true on success, otherwise a negative error value */ - int GetObjectDetectionResults(ObjectDetectionResults *detectionResults); + int GetObjectDetectionResults(ObjectDetectionResults *results); /** * @brief Gets the FaceDetectioResults @@ -309,7 +309,7 @@ namespace inference * @since_tizen 5.5 * @return @c true on success, otherwise a negative error value */ - int GetFaceDetectionResults(FaceDetectionResults *detectionResults); + int GetFaceDetectionResults(FaceDetectionResults *results); /** * @brief Gets the FacialLandmarkDetectionResults diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 05c9c07..d79b3ff 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1091,14 +1091,12 @@ namespace inference return mSupportedInferenceBackend[backend]; } - int Inference::GetClassficationResults(ImageClassificationResults &results) + int Inference::GetClassficationResults(ImageClassificationResults *results) { // Will contain top N results in ascending order. std::vector> topScore; auto threadHold = mConfig.mConfidenceThresHold; - results.number_of_classes = 0; - if (mMetadata.GetOutputMeta().IsParsed()) { OutputMetadata outputMetadata = mMetadata.GetOutputMeta(); std::vector indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll(); @@ -1191,23 +1189,25 @@ namespace inference std::reverse(topScore.begin(), topScore.end()); } + results->number_of_classes = 0; + for (auto& score : topScore) { LOGI("score: %.3f, threshold: %.3f", score.first, threadHold); LOGI("idx:%d", score.second); LOGI("classProb: %.3f", score.first); - results.indices.push_back(score.second); - results.confidences.push_back(score.first); - results.names.push_back(mUserListName[score.second]); - results.number_of_classes++; + results->indices.push_back(score.second); + results->confidences.push_back(score.first); + results->names.push_back(mUserListName[score.second]); + results->number_of_classes++; } - LOGE("Inference: GetClassificationResults: %d\n", results.number_of_classes); + LOGE("Inference: GetClassificationResults: %d\n", results->number_of_classes); return MEDIA_VISION_ERROR_NONE; } int Inference::GetObjectDetectionResults( - ObjectDetectionResults *detectionResults) + ObjectDetectionResults *results) { if (mMetadata.GetOutputMeta().IsParsed()) { OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); @@ -1222,6 +1222,7 @@ namespace inference int boxOffset = 0; int numberOfObjects = 0; + if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { std::vector boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); if (boxIndexes.size() != 1) { @@ -1252,23 +1253,21 @@ namespace inference objDecoder.init(); objDecoder.decode(); - ObjectDetectionResults results; - results.number_of_objects = 0; + results->number_of_objects = 0; for (auto& box : objDecoder.getObjectAll()) { - results.indices.push_back(box.index); - results.names.push_back(mUserListName[box.index]); - results.confidences.push_back(box.score); - results.locations.push_back(cv::Rect( + results->indices.push_back(box.index); + results->names.push_back(mUserListName[box.index]); + results->confidences.push_back(box.score); + results->locations.push_back(cv::Rect( static_cast((box.location.x - box.location.width * 0.5f) * static_cast(mSourceSize.width)), static_cast((box.location.y - box.location.height * 0.5f) * static_cast(mSourceSize.height)), static_cast(box.location.width * static_cast(mSourceSize.width)), static_cast(box.location.height * static_cast(mSourceSize.height)))); - results.number_of_objects++; + results->number_of_objects++; } - *detectionResults = results; - LOGI("Inference: GetObjectDetectionResults: %d\n", - results.number_of_objects); + + LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects); } else { tensor_t outputData; @@ -1288,7 +1287,6 @@ namespace inference float *scores = nullptr; int number_of_detections = 0; - cv::Mat cvScores, cvClasses, cvBoxes; if (outputData.dimInfo.size() == 1) { // there is no way to know how many objects are detect unless the number of objects aren't // provided. In the case, each backend should provide the number of results manually. @@ -1307,8 +1305,9 @@ namespace inference cv::Mat cvTop = cvOutputData.col(4).clone(); cv::Mat cvRight = cvOutputData.col(5).clone(); cv::Mat cvBottom = cvOutputData.col(6).clone(); - + cv::Mat cvScores, cvClasses, cvBoxes; cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; + cv::hconcat(cvBoxElems, 4, cvBoxes); // classes @@ -1320,42 +1319,38 @@ namespace inference boxes = cvBoxes.ptr(0); classes = cvClasses.ptr(0); scores = cvScores.ptr(0); - } else { boxes = reinterpret_cast(outputData.data[0]); classes = reinterpret_cast(outputData.data[1]); scores = reinterpret_cast(outputData.data[2]); - number_of_detections = - (int) (*reinterpret_cast(outputData.data[3])); + number_of_detections = (int) (*reinterpret_cast(outputData.data[3])); } LOGI("number_of_detections = %d", number_of_detections); - int left, top, right, bottom; - cv::Rect loc; + results->number_of_objects = 0; - ObjectDetectionResults results; - results.number_of_objects = 0; for (int idx = 0; idx < number_of_detections; ++idx) { if (scores[idx] < mThreshold) continue; - left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); - top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); - right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); - bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); + int left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); + int top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); + int right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); + int bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); + cv::Rect loc; loc.x = left; loc.y = top; loc.width = right - left + 1; loc.height = bottom - top + 1; - results.indices.push_back(static_cast(classes[idx])); - results.confidences.push_back(scores[idx]); - results.names.push_back( + results->indices.push_back(static_cast(classes[idx])); + results->confidences.push_back(scores[idx]); + results->names.push_back( mUserListName[static_cast(classes[idx])]); - results.locations.push_back(loc); - results.number_of_objects++; + results->locations.push_back(loc); + results->number_of_objects++; LOGI("objectClass: %d", static_cast(classes[idx])); LOGI("confidence:%f", scores[idx]); @@ -1363,16 +1358,13 @@ namespace inference bottom); } - *detectionResults = results; - LOGI("Inference: GetObjectDetectionResults: %d\n", - results.number_of_objects); + LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects); } return MEDIA_VISION_ERROR_NONE; } - int - Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults) + int Inference::GetFaceDetectionResults(FaceDetectionResults *results) { if (mMetadata.GetOutputMeta().IsParsed()) { OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); @@ -1387,6 +1379,7 @@ namespace inference int boxOffset = 0; int numberOfFaces = 0; + if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { std::vector boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); if (boxIndexes.size() != 1) { @@ -1417,21 +1410,19 @@ namespace inference objDecoder.init(); objDecoder.decode(); - FaceDetectionResults results; - results.number_of_faces = 0; + results->number_of_faces = 0; for (auto& face : objDecoder.getObjectAll()) { - results.confidences.push_back(face.score); - results.locations.push_back(cv::Rect( + results->confidences.push_back(face.score); + results->locations.push_back(cv::Rect( static_cast((face.location.x - face.location.width * 0.5f) * static_cast(mSourceSize.width)), static_cast((face.location.y - face.location.height * 0.5f) * static_cast(mSourceSize.height)), static_cast(face.location.width * static_cast(mSourceSize.width)), static_cast(face.location.height * static_cast(mSourceSize.height)))); - results.number_of_faces++; + results->number_of_faces++; } - *detectionResults = results; - LOGE("Inference: GetFaceDetectionResults: %d\n", - results.number_of_faces); + + LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces); } else { tensor_t outputData; @@ -1450,8 +1441,8 @@ namespace inference float *classes = nullptr; float *scores = nullptr; int number_of_detections = 0; - cv::Mat cvScores, cvClasses, cvBoxes; + if (outputData.dimInfo.size() == 1) { // there is no way to know how many objects are detect unless the number of objects aren't // provided. In the case, each backend should provide the number of results manually. @@ -1460,17 +1451,14 @@ namespace inference // indicates the image id. But it is useless if a batch mode isn't supported. // So, use the 1st of 7. - number_of_detections = static_cast( - *reinterpret_cast(outputData.data[0])); - cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], - CV_32F, outputData.data[0]); + number_of_detections = static_cast(*reinterpret_cast(outputData.data[0])); + cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]); // boxes cv::Mat cvLeft = cvOutputData.col(3).clone(); cv::Mat cvTop = cvOutputData.col(4).clone(); cv::Mat cvRight = cvOutputData.col(5).clone(); cv::Mat cvBottom = cvOutputData.col(6).clone(); - cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; cv::hconcat(cvBoxElems, 4, cvBoxes); @@ -1483,49 +1471,41 @@ namespace inference boxes = cvBoxes.ptr(0); classes = cvClasses.ptr(0); scores = cvScores.ptr(0); - } else { boxes = reinterpret_cast(outputData.data[0]); classes = reinterpret_cast(outputData.data[1]); scores = reinterpret_cast(outputData.data[2]); - number_of_detections = static_cast( - *reinterpret_cast(outputData.data[3])); + number_of_detections = static_cast(*reinterpret_cast(outputData.data[3])); } - int left, top, right, bottom; - cv::Rect loc; + results->number_of_faces = 0; - FaceDetectionResults results; - results.number_of_faces = 0; for (int idx = 0; idx < number_of_detections; ++idx) { if (scores[idx] < mThreshold) continue; - left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); - top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); - right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); - bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); + int left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); + int top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); + int right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); + int bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); + cv::Rect loc; loc.x = left; loc.y = top; loc.width = right - left + 1; loc.height = bottom - top + 1; - - results.confidences.push_back(scores[idx]); - results.locations.push_back(loc); - results.number_of_faces++; + results->confidences.push_back(scores[idx]); + results->locations.push_back(loc); + results->number_of_faces++; LOGI("confidence:%f", scores[idx]); LOGI("class: %f", classes[idx]); LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1], boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]); - LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, - bottom); + LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom); } - *detectionResults = results; - LOGE("Inference: GetFaceDetectionResults: %d\n", - results.number_of_faces); + LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces); } return MEDIA_VISION_ERROR_NONE; @@ -1699,12 +1679,11 @@ namespace inference } poseDecoder.decode(inputW, inputH, thresRadius); - - int part = 0; poseResult->number_of_poses = poseDecoder.getNumberOfPose(); + for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) { for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; ++ landmarkIndex) { - part = landmarkIndex; + int part = landmarkIndex; if (!mUserListName.empty()) { part = std::stoi(mUserListName[landmarkIndex]) - 1; if (part < 0) { @@ -1773,6 +1752,7 @@ namespace inference loc2f.x = (static_cast(loc.x) / ratioX); loc2f.y = (static_cast(loc.y) / ratioY); + LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]", landmarkIndex, part, loc2f.x, loc2f.y, score); diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp index c945ac4..a9fd490 100644 --- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp @@ -638,7 +638,7 @@ int mv_inference_image_classify_open( ImageClassificationResults classificationResults; - ret = pInfer->GetClassficationResults(classificationResults); + ret = pInfer->GetClassficationResults(&classificationResults); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to get inference results"); return ret; @@ -699,14 +699,14 @@ int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer, locations.resize(numberOfOutputs); } - for (int n = 0; n < numberOfOutputs; ++n) { - LOGE("names: %s", objectDetectionResults.names[n].c_str()); - names[n] = objectDetectionResults.names[n].c_str(); + for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) { + LOGE("names: %s", objectDetectionResults.names[output_idx].c_str()); + names[output_idx] = objectDetectionResults.names[output_idx].c_str(); - locations[n].point.x = objectDetectionResults.locations[n].x; - locations[n].point.y = objectDetectionResults.locations[n].y; - locations[n].width = objectDetectionResults.locations[n].width; - locations[n].height = objectDetectionResults.locations[n].height; + locations[output_idx].point.x = objectDetectionResults.locations[output_idx].x; + locations[output_idx].point.y = objectDetectionResults.locations[output_idx].y; + locations[output_idx].width = objectDetectionResults.locations[output_idx].width; + locations[output_idx].height = objectDetectionResults.locations[output_idx].height; } int *indices = objectDetectionResults.indices.data(); @@ -745,11 +745,11 @@ int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer, int numberOfOutputs = faceDetectionResults.number_of_faces; std::vector locations(numberOfOutputs); - for (int n = 0; n < numberOfOutputs; ++n) { - locations[n].point.x = faceDetectionResults.locations[n].x; - locations[n].point.y = faceDetectionResults.locations[n].y; - locations[n].width = faceDetectionResults.locations[n].width; - locations[n].height = faceDetectionResults.locations[n].height; + for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) { + locations[output_idx].point.x = faceDetectionResults.locations[output_idx].x; + locations[output_idx].point.y = faceDetectionResults.locations[output_idx].y; + locations[output_idx].width = faceDetectionResults.locations[output_idx].width; + locations[output_idx].height = faceDetectionResults.locations[output_idx].height; } float *confidences = faceDetectionResults.confidences.data(); -- 2.7.4 From 041a814164e95cb1adbd18485c66ed3be2103dfc Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Thu, 14 Oct 2021 15:43:59 +0900 Subject: [PATCH 05/16] mv_machine_learning: code refactoring to OutputMetadata module OutputMetadata.h and cpp files contain many classes and relevant code so it makes maintenance too hard. The biggest change of this refactoring is to separate many classes in OutputMetadata file into each one. And also it changes unnecessary class type to struct including several cleanups such as code sliding, renaming and dropping unnessary code. Change-Id: I0ce677d333ce3a3e7212f7d26a20b6cf77bc7a9a Signed-off-by: Inki Dae --- .../mv_inference/inference/include/BoxInfo.h | 131 +++++++ .../mv_inference/inference/include/DecodeInfo.h | 156 ++++++++ .../mv_inference/inference/include/DimInfo.h | 52 +++ .../mv_inference/inference/include/DispVec.h | 93 +++++ .../mv_inference/inference/include/Edge.h | 68 ++++ .../mv_inference/inference/include/InputMetadata.h | 3 - .../mv_inference/inference/include/Landmark.h | 127 ++++++- .../mv_inference/inference/include/OffsetVec.h | 84 +++++ .../inference/include/OutputMetadata.h | 329 +--------------- .../mv_inference/inference/include/ScoreInfo.h | 129 +++++++ .../mv_inference/inference/include/Utils.h | 46 +++ .../mv_inference/inference/src/Inference.cpp | 4 +- .../mv_inference/inference/src/InputMetadata.cpp | 15 +- .../mv_inference/inference/src/OutputMetadata.cpp | 419 ++------------------- packaging/capi-media-vision.spec | 2 +- 15 files changed, 938 insertions(+), 720 deletions(-) create mode 100644 mv_machine_learning/mv_inference/inference/include/BoxInfo.h create mode 100644 mv_machine_learning/mv_inference/inference/include/DecodeInfo.h create mode 100644 mv_machine_learning/mv_inference/inference/include/DimInfo.h create mode 100644 mv_machine_learning/mv_inference/inference/include/DispVec.h create mode 100644 mv_machine_learning/mv_inference/inference/include/Edge.h create mode 100644 mv_machine_learning/mv_inference/inference/include/OffsetVec.h create mode 100644 mv_machine_learning/mv_inference/inference/include/ScoreInfo.h create mode 100644 mv_machine_learning/mv_inference/inference/include/Utils.h diff --git a/mv_machine_learning/mv_inference/inference/include/BoxInfo.h b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h new file mode 100644 index 0000000..ceffaa0 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h @@ -0,0 +1,131 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BOX_INFO_H__ +#define __BOX_INFO_H__ + +#include +#include +#include +#include + +#include +#include + +namespace mediavision +{ +namespace inference +{ +namespace box +{ + class BoxInfo + { + private: + std::string name; + DimInfo dimInfo; + inference_box_type_e type; // 0:L-T-R-B, 1: Cx-Cy-W-H + std::vector order; // Order based on box type + inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel + inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor + DecodeInfo decodingInfo; + + std::map supportedBoxTypes; + std::map supportedBoxCoordinateTypes; + std::map supportedBoxDecodingTypes; + + public: + BoxInfo() : + name(), + dimInfo(), + type(INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP), + order(), + coordinate(INFERENCE_BOX_COORDINATE_TYPE_RATIO), + decodingType(INFERENCE_BOX_DECODING_TYPE_BYPASS), + decodingInfo() + + { + supportedBoxTypes.insert({"ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP}); + supportedBoxTypes.insert({"ORIGIN_CENTER", INFERENCE_BOX_TYPE_ORIGIN_CENTER}); + + supportedBoxCoordinateTypes.insert({"RATIO", INFERENCE_BOX_COORDINATE_TYPE_RATIO}); + supportedBoxCoordinateTypes.insert({"PIXEL", INFERENCE_BOX_COORDINATE_TYPE_PIXEL}); + + supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS}); + supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR}); + } + + ~BoxInfo() = default; + + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + inference_box_type_e GetType() { return type; } + inference_box_decoding_type_e GetDecodingType() { return decodingType; } + std::vector GetOrder() { return order; } + int GetCoordinate() { return coordinate; } + DecodeInfo& GetDecodeInfo() {return decodingInfo; } + + int ParseBox(JsonObject *root) + { + LOGI("ENTER"); + + JsonArray * rootArray = json_object_get_array_member(root, "box"); + unsigned int elements = json_array_get_length(rootArray); + + for (unsigned int elem_idx = 0; elem_idx < elements; ++elem_idx) { + JsonNode *pNode = json_array_get_element(rootArray, elem_idx); + JsonObject *pObject = json_node_get_object(pNode); + + name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + + LOGI("range dim: size[%u]", elements2); + + for (unsigned int elem2_idx = 0; elem2_idx < elements2; ++elem2_idx) + if (static_cast(json_array_get_int_element(array, elem2_idx)) == 1) + dimInfo.SetValidIndex(elem2_idx); + + try { + type = GetSupportedType(pObject, "box_type", supportedBoxTypes); + coordinate = GetSupportedType(pObject, "box_coordinate", supportedBoxCoordinateTypes); + decodingType = GetSupportedType(pObject, "decoding_type", supportedBoxDecodingTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + array = json_object_get_array_member(pObject, "box_order"); + elements2 = json_array_get_length(array); + LOGI("box order should have 4 elements and it has [%u]", elements2); + + for (unsigned int elem2_idx = 0; elem2_idx < elements2; ++elem2_idx) { + auto val = static_cast(json_array_get_int_element(array, elem2_idx)); + order.push_back(val); + LOGI("%d", val); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + }; +} /* box */ +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h new file mode 100644 index 0000000..a872c3a --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h @@ -0,0 +1,156 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DECODE_INFO_H__ +#define __DECODE_INFO_H__ + +#include +#include +#include +#include + +#include +#include + +namespace mediavision +{ +namespace inference +{ +namespace box +{ + struct AnchorParam { + int mode; /**< 0: generate anchor, 1:load pre-anchor*/ + int numLayers; + float minScale; + float maxScale; + int inputSizeHeight; + int inputSizeWidth; + float anchorOffsetX; + float anchorOffsetY; + std::vector strides; + std::vector aspectRatios; + bool isReduceBoxedInLowestLayer; + float interpolatedScaleAspectRatio; + bool isFixedAnchorSize; + bool isExponentialBoxScale; + float xScale; + float yScale; + float wScale; + float hScale; + }; + + struct NMSParam { + inference_box_nms_type_e mode; /**< 0: standard */ + float iouThreshold; + std::map supportedBoxNmsTypes; + }; + + struct RotateParam { + int startPointIndex; + int endPointIndex; + cv::Point2f startPoint; + cv::Point2f endPoint; + float baseAngle; + }; + + struct RoiOptionParam { + int startPointIndex; + int endPointIndex; + int centerPointIndex; + cv::Point2f centerPoint; + float shiftX; + float shiftY; + float scaleX; + float scaleY; + int mode; + }; + + class DecodeInfo { + private: + AnchorParam anchorParam; + std::vector anchorBoxes; + NMSParam nmsParam; + RotateParam rotParam; + RoiOptionParam roiOptParam; + + public: + DecodeInfo() { + nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE; + nmsParam.iouThreshold = 0.2f; + nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD}); + + rotParam.startPointIndex = -1; + rotParam.endPointIndex = -1; + rotParam.startPoint = cv::Point2f(0.f,0.f); + rotParam.endPoint = cv::Point2f(0.f,0.f); + rotParam.baseAngle = 0.f; + + roiOptParam.startPointIndex = -1; + roiOptParam.endPointIndex = -1; + roiOptParam.centerPointIndex = -1; + roiOptParam.centerPoint = cv::Point2f(0.f, 0.f); + roiOptParam.shiftX = 0.f; + roiOptParam.shiftY = 0.f; + roiOptParam.scaleX = 1.f; + roiOptParam.scaleY = 1.f; + roiOptParam.mode = -1; + } + + ~DecodeInfo() = default; + + std::vector& GetAnchorBoxAll(); + bool IsAnchorBoxEmpty(); + void AddAnchorBox(cv::Rect2f& ahcnor); + void ClearAnchorBox(); + + // Anchor param + int ParseAnchorParam(JsonObject *root); + int GenerateAnchor(); + bool IsFixedAnchorSize(); + bool IsExponentialBoxScale(); + float GetAnchorXscale(); + float GetAnchorYscale(); + float GetAnchorWscale(); + float GetAnchorHscale(); + float CalculateScale(float min, float max, int index, int maxStride); + + // Nms param + int ParseNms(JsonObject *root); + int GetNmsMode(); + float GetNmsIouThreshold(); + + // Rotate param + int ParseRotate(JsonObject *root); + int GetRotStartPointIndex(); + int GetRotEndPointIndex(); + float GetBaseAngle(); + + // Roi option param + int ParseRoiOption(JsonObject *root); + int GetRoiMode(); + int GetRoiCenterPointIndex(); + int GetRoiStartPointIndex(); + int GetRoiEndPointIndex(); + float GetShiftX(); + float GetShiftY(); + float GetScaleX(); + float GetScaleY(); + }; +} /* box */ +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/DimInfo.h b/mv_machine_learning/mv_inference/inference/include/DimInfo.h new file mode 100644 index 0000000..d061122 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/DimInfo.h @@ -0,0 +1,52 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DIM_INFO_H__ +#define __DIM_INFO_H__ + +#include + +namespace mediavision +{ +namespace inference +{ + class DimInfo + { + private: + std::vector dims; + + public: + std::vector GetValidIndexAll() const + { + LOGI("ENTER"); + + LOGI("LEAVE"); + return dims; + } + + void SetValidIndex(int index) + { + LOGI("ENTER"); + + dims.push_back(index); + + LOGI("LEAVE"); + } + }; +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/DispVec.h b/mv_machine_learning/mv_inference/inference/include/DispVec.h new file mode 100644 index 0000000..f43dcf5 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/DispVec.h @@ -0,0 +1,93 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DISP_VEC_H__ +#define __DISP_VEC_H__ + +#include +#include +#include +#include + +#include +#include "DimInfo.h" +#include "Utils.h" + +namespace mediavision +{ +namespace inference +{ + class DispVec + { + private: + std::string name; + DimInfo dimInfo; + inference_displacement_type_e type; + int shapeType; + std::map supportedDispTypes; + + public: + DispVec() : + name(), + dimInfo(), + type(INFERENCE_DISPLACEMENT_TYPE_FORWARD), + shapeType(INFERENCE_TENSOR_SHAPE_NCHW) + { + supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD}); + supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD}); + } + + ~DispVec() = default; + + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + inference_displacement_type_e GetType() { return type; } + int GetShapeType() { return shapeType; } + + int ParseDisplacement(JsonObject *root, const std::map& supportedShapeType) + { + LOGI("ENTER"); + + name = static_cast(json_object_get_string_member(root,"name")); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(root, "index"); + unsigned int elements2 = json_array_get_length(array); + + LOGI("range dim: size[%u]", elements2); + + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if(static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + try { + shapeType = GetSupportedType(root, "shape_type", supportedShapeType); + type = GetSupportedType(root, "type", supportedDispTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + }; +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/Edge.h b/mv_machine_learning/mv_inference/inference/include/Edge.h new file mode 100644 index 0000000..80c0216 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Edge.h @@ -0,0 +1,68 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __EDGE_H__ +#define __EDGE_H__ + +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + class Edge + { + private: + std::vector> edges; + + public: + Edge() = default; + + ~Edge() = default; + + int ParseEdge(JsonObject *root) + { + LOGI("ENTER"); + + JsonArray * rootArray = json_object_get_array_member(root, "edgemap"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + int pEdgeNode, cEdgeNode; + + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + pEdgeNode = json_object_get_int_member(pObject, "parent"); + cEdgeNode = json_object_get_int_member(pObject, "child"); + + edges.push_back(std::make_pair(pEdgeNode, cEdgeNode)); + LOGI("%ud: parent - child: %d - %d", elem, pEdgeNode, cEdgeNode); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + std::vector>& GetEdgesAll() { return edges; } + }; +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h index bdd12c2..c2bf94d 100644 --- a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h @@ -121,9 +121,6 @@ namespace inference std::map layer; std::map option; - template - static T GetSupportedType(JsonObject* root, std::string typeName, - std::map& supportedTypes); int GetTensorInfo(JsonObject* root); int GetPreProcess(JsonObject* root); diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h index 63ccf60..2fe6c9a 100644 --- a/mv_machine_learning/mv_inference/inference/include/Landmark.h +++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __MEDIA_VISION_LANDMARK_H__ -#define __MEDIA_VISION_LANDMARK_H__ +#ifndef __LANDMARK_H__ +#define __LANDMARK_H__ #include #include @@ -47,7 +47,128 @@ namespace inference float score; } LandmarkResults; + typedef struct _HeatMapInfo { + int wIdx; + int hIdx; + int cIdx; + float nmsRadius; + inference_tensor_shape_type_e shapeType; + } HeatMapInfo; + + class Landmark + { + private: + std::string name; + DimInfo dimInfo; + inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */ + int offset; + inference_landmark_coorindate_type_e coordinate; /**< 0: RATIO, 1: PIXEL */ + inference_landmark_decoding_type_e decodingType; /**< 0: decoding unnecessary, + 1: decoding heatmap, + 2: decoding heatmap with refinement */ + HeatMapInfo heatMapInfo; + + std::map supportedLandmarkTypes; + std::map supportedLandmarkCoordinateTypes; + std::map supportedLandmarkDecodingTypes; + + public: + + Landmark() : + name(), + dimInfo(), + type(INFERENCE_LANDMARK_TYPE_2D_SINGLE), + offset(), + coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO), + decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS), + heatMapInfo() + + { + supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE}); + supportedLandmarkTypes.insert({"2D_MULTI", INFERENCE_LANDMARK_TYPE_2D_MULTI}); + supportedLandmarkTypes.insert({"3D_SINGLE", INFERENCE_LANDMARK_TYPE_3D_SINGLE}); + + supportedLandmarkCoordinateTypes.insert({"RATIO", INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO}); + supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL}); + + supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS}); + supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP}); + supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE}); + } + + ~Landmark() = default; + + int ParseLandmark(JsonObject *root) + { + // box + JsonArray * rootArray = json_object_get_array_member(root, "landmark"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + try { + type = GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes); + coordinate = GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes); + decodingType = GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + offset = static_cast(json_object_get_int_member(pObject, "landmark_offset")); + LOGI("landmark offset: %d", offset); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + inference_landmark_type_e GetType() + { + return type; + } + + int GetOffset() + { + return offset; + } + + inference_landmark_coorindate_type_e GetCoordinate() + { + return coordinate; + } + + inference_landmark_decoding_type_e GetDecodingType() + { + return decodingType; + } + + HeatMapInfo& GetHeatMapInfo() + { + return heatMapInfo; + } + + std::string GetName() { return name; } + + DimInfo GetDimInfo() { return dimInfo; } + }; } /* Inference */ } /* MediaVision */ -#endif /* __MEDIA_VISION_LANDMARK_H__ */ +#endif /* __LANDMARK_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/OffsetVec.h b/mv_machine_learning/mv_inference/inference/include/OffsetVec.h new file mode 100644 index 0000000..c5fe30b --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/OffsetVec.h @@ -0,0 +1,84 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OFFSET_VEC_H__ +#define __OFFSET_VEC_H__ + +#include +#include +#include + +#include +#include "DimInfo.h" +#include "Utils.h" + +namespace mediavision +{ +namespace inference +{ + class OffsetVec + { + private: + std::string name; + DimInfo dimInfo; + int shapeType; + public: + OffsetVec() : name(), dimInfo(), shapeType() { } + ~OffsetVec() = default; + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + int GetShapeType() { return shapeType; } + + int ParseOffset(JsonObject *root, const std::map& supportedShapeType) + { + JsonArray * rootArray = json_object_get_array_member(root, "offset"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + try { + shapeType = GetSupportedType(pObject, "shape_type", supportedShapeType); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + }; +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index d223726..6724526 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -27,6 +27,14 @@ #include #include #include "OutputMetadataTypes.h" +#include "DecodeInfo.h" +#include "Edge.h" +#include "DispVec.h" +#include "DimInfo.h" +#include "OffsetVec.h" +#include "Landmark.h" +#include "BoxInfo.h" +#include "ScoreInfo.h" /** * @file OutputMetadata.h @@ -38,316 +46,16 @@ namespace mediavision { namespace inference { - class DimInfo + struct Label { - private: - std::vector dims; - - public: - std::vector GetValidIndexAll() const; - void SetValidIndex(int index); - }; - - class DeQuantization - { - private: - double scale; - double zeropoint; - - public: - DeQuantization(double s, double z) : scale(s), zeropoint(z) {}; - ~DeQuantization() = default; - - double GetScale() { return scale; } - double GetZeroPoint() { return zeropoint; } - }; - - class ScoreInfo - { - private: - std::string name; - DimInfo dimInfo; - double threshold; - int topNumber; - inference_score_type_e type; - std::shared_ptr deQuantization; - std::map supportedScoreTypes; - - public: - ScoreInfo(); - ~ScoreInfo() = default; - - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - double GetThresHold() { return threshold; } - inference_score_type_e GetType() { return type; } - int GetTopNumber() { return topNumber; } - std::shared_ptr GetDeQuant() { return deQuantization; } - int ParseScore(JsonObject *root); - }; - - struct AnchorParam { - int mode; /**< 0: generate anchor, 1:load pre-anchor*/ - int numLayers; - float minScale; - float maxScale; - int inputSizeHeight; - int inputSizeWidth; - float anchorOffsetX; - float anchorOffsetY; - std::vector strides; - std::vector aspectRatios; - bool isReduceBoxedInLowestLayer; - float interpolatedScaleAspectRatio; - bool isFixedAnchorSize; - bool isExponentialBoxScale; - float xScale; - float yScale; - float wScale; - float hScale; - }; - - struct NMSParam { - inference_box_nms_type_e mode; /**< 0: standard */ - float iouThreshold; - std::map supportedBoxNmsTypes; - }; - - struct RotateParam { - int startPointIndex; - int endPointIndex; - cv::Point2f startPoint; - cv::Point2f endPoint; - float baseAngle; - }; - - struct RoiOptionParam { - int startPointIndex; - int endPointIndex; - int centerPointIndex; - cv::Point2f centerPoint; - float shiftX; - float shiftY; - float scaleX; - float scaleY; - int mode; - }; - - class DecodeInfo { - private: - AnchorParam anchorParam; - std::vector anchorBoxes; - NMSParam nmsParam; - RotateParam rotParam; - RoiOptionParam roiOptParam; - - public: - DecodeInfo() { - nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE; - nmsParam.iouThreshold = 0.2f; - nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD}); - - rotParam.startPointIndex = -1; - rotParam.endPointIndex = -1; - rotParam.startPoint = cv::Point2f(0.f,0.f); - rotParam.endPoint = cv::Point2f(0.f,0.f); - rotParam.baseAngle = 0.f; - - roiOptParam.startPointIndex = -1; - roiOptParam.endPointIndex = -1; - roiOptParam.centerPointIndex = -1; - roiOptParam.centerPoint = cv::Point2f(0.f, 0.f); - roiOptParam.shiftX = 0.f; - roiOptParam.shiftY = 0.f; - roiOptParam.scaleX = 1.f; - roiOptParam.scaleY = 1.f; - roiOptParam.mode = -1; - } - - ~DecodeInfo() = default; - - std::vector& GetAnchorBoxAll(); - bool IsAnchorBoxEmpty(); - void AddAnchorBox(cv::Rect2f& ahcnor); - void ClearAnchorBox(); - - // Anchor param - int ParseAnchorParam(JsonObject *root); - int GenerateAnchor(); - bool IsFixedAnchorSize(); - bool IsExponentialBoxScale(); - float GetAnchorXscale(); - float GetAnchorYscale(); - float GetAnchorWscale(); - float GetAnchorHscale(); - float CalculateScale(float min, float max, int index, int maxStride); - - // Nms param - int ParseNms(JsonObject *root); - int GetNmsMode(); - float GetNmsIouThreshold(); - - // Rotate param - int ParseRotate(JsonObject *root); - int GetRotStartPointIndex(); - int GetRotEndPointIndex(); - float GetBaseAngle(); - - // Roi option param - int ParseRoiOption(JsonObject *root); - int GetRoiMode(); - int GetRoiCenterPointIndex(); - int GetRoiStartPointIndex(); - int GetRoiEndPointIndex(); - float GetShiftX(); - float GetShiftY(); - float GetScaleX(); - float GetScaleY(); - }; - - class BoxInfo - { - private: - std::string name; - DimInfo dimInfo; - inference_box_type_e type; // 0:L-T-R-B, 1: Cx-Cy-W-H - std::vector order; // Order based on box type - inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel - inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor - DecodeInfo decodingInfo; - - std::map supportedBoxTypes; - std::map supportedBoxCoordinateTypes; - std::map supportedBoxDecodingTypes; - - public: - BoxInfo(); - ~BoxInfo() = default; - - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - inference_box_type_e GetType() { return type; } - inference_box_decoding_type_e GetDecodingType() { return decodingType; } - std::vector GetOrder() { return order; } - int GetCoordinate() { return coordinate; } - DecodeInfo& GetDecodeInfo() {return decodingInfo; } - - int ParseBox(JsonObject *root); - }; - - class Label - { - private: - std::string name; - DimInfo dimInfo; - - public: - Label() = default; - ~Label() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - - int ParseLabel(JsonObject *root); - }; - - class Number - { - private: std::string name; DimInfo dimInfo; - - public: - Number() = default; - ~Number() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - - int ParseNumber(JsonObject *root); }; - struct HeatMapInfo { - int wIdx; - int hIdx; - int cIdx; - float nmsRadius; - inference_tensor_shape_type_e shapeType; - }; - - class Landmark + struct Number { - private: std::string name; DimInfo dimInfo; - inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */ - int offset; - inference_landmark_coorindate_type_e coordinate; /**< 0: RATIO, 1: PIXEL */ - inference_landmark_decoding_type_e decodingType; /**< 0: decoding unnecessary, - 1: decoding heatmap, - 2: decoding heatmap with refinement */ - HeatMapInfo heatMapInfo; - - std::map supportedLandmarkTypes; - std::map supportedLandmarkCoordinateTypes; - std::map supportedLandmarkDecodingTypes; - - public: - Landmark(); - ~Landmark() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - inference_landmark_type_e GetType(); - int GetOffset(); - inference_landmark_coorindate_type_e GetCoordinate(); - inference_landmark_decoding_type_e GetDecodingType(); - HeatMapInfo& GetHeatMapInfo(); - - int ParseLandmark(JsonObject *root); - }; - - class OffsetVec - { - private: - std::string name; - DimInfo dimInfo; - int shapeType; - public: - OffsetVec() = default; - ~OffsetVec() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - int GetShapeType() { return shapeType; } - - int ParseOffset(JsonObject *root); - }; - - class DispVec - { - private: - std::string name; - DimInfo dimInfo; - inference_displacement_type_e type; - int shapeType; - std::map supportedDispTypes; - public: - DispVec(); - ~DispVec() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - inference_displacement_type_e GetType() { return type; } - int GetShapeType() { return shapeType; } - - int ParseDisplacement(JsonObject *root); - }; - - class Edge - { - private: - std::vector> edges; - public: - Edge() = default; - ~Edge() = default; - int ParseEdge(JsonObject *root); - std::vector>& GetEdgesAll() { return edges; } }; class OutputMetadata @@ -355,13 +63,14 @@ namespace inference private: bool parsed; ScoreInfo score; - BoxInfo box; + box::BoxInfo box; Label label; Number number; Landmark landmark; OffsetVec offsetVec; std::vector dispVecs; Edge edgeMap; + std::map mSupportedShapeType; int ParseScore(JsonObject *root); int ParseBox(JsonObject *root); @@ -375,7 +84,6 @@ namespace inference int ParseEdgeMap(JsonObject * root); public: - static std::map supportedTensorShapes; /** * @brief Creates an OutputMetadata class instance. * @@ -405,15 +113,17 @@ namespace inference double GetScoreThreshold() { return score.GetThresHold(); } int GetScoreTopNumber() { return score.GetTopNumber(); } std::shared_ptr GetScoreDeQuant() { return score.GetDeQuant(); } + double GetScoreDeQuantScale() { return score.GetDeQuantScale(); } + double GetScoreDeQuantZeroPoint() { return score.GetDeQuantZeroPoint(); } std::string GetBoxName() { return box.GetName(); } DimInfo GetBoxDimInfo() { return box.GetDimInfo(); } std::vector GetBoxOrder() { return box.GetOrder(); } - DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); } + box::DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); } inference_box_type_e GetBoxType() { return box.GetType(); } int GetScoreCoordinate() { return box.GetCoordinate(); } - std::string GetLabelName() { return label.GetName(); } - std::string GetNumberName() { return number.GetName(); } - DimInfo GetNumberDimInfo() { return number.GetDimInfo(); } + std::string GetLabelName() { return label.name; } + std::string GetNumberName() { return number.name; } + DimInfo GetNumberDimInfo() { return number.dimInfo; } std::string GetLandmarkName() { return landmark.GetName(); } int GetLandmarkOffset() { return landmark.GetOffset(); } inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); } @@ -425,9 +135,6 @@ namespace inference inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); } std::vector& GetDispVecAll() { return dispVecs; } std::vector>& GetEdges() { return edgeMap.GetEdgesAll(); } - template - static T GetSupportedType(JsonObject* root, std::string typeName, - std::map& supportedTypes); }; } /* Inference */ } /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h new file mode 100644 index 0000000..24180d7 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h @@ -0,0 +1,129 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SCORE_INFO_H__ +#define __SCORE_INFO_H__ + +#include +#include +#include +#include + +#include +#include "DimInfo.h" + +namespace mediavision +{ +namespace inference +{ + struct DeQuantization + { + double scale; + double zeropoint; + + DeQuantization(double s, double z) : scale(s), zeropoint(z) { } + }; + + class ScoreInfo + { + private: + std::string name; + DimInfo dimInfo; + double threshold; + int topNumber; + inference_score_type_e type; + std::shared_ptr deQuantization; + std::map supportedScoreTypes; + + public: + ScoreInfo() : + name(), + dimInfo(), + threshold(0.0), + topNumber(1), + type(INFERENCE_SCORE_TYPE_NORMAL), + deQuantization(nullptr) + { + // Score type + supportedScoreTypes.insert({"NORMAL", INFERENCE_SCORE_TYPE_NORMAL}); + supportedScoreTypes.insert({"SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID}); + } + + ~ScoreInfo() = default; + + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + double GetThresHold() { return threshold; } + inference_score_type_e GetType() { return type; } + int GetTopNumber() { return topNumber; } + std::shared_ptr GetDeQuant() { return deQuantization; } + double GetDeQuantScale() { return deQuantization->scale; } + double GetDeQuantZeroPoint() { return deQuantization->zeropoint; } + + int ParseScore(JsonObject *root) + { + LOGI("ENTER"); + + JsonArray * rootArray = json_object_get_array_member(root, "score"); + unsigned int elements = json_array_get_length(rootArray); + + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + topNumber = static_cast(json_object_get_int_member(pObject, "top_number")); + LOGI("top number: %d", topNumber); + + threshold = static_cast(json_object_get_double_member(pObject, "threshold")); + LOGI("threshold: %1.3f", threshold); + + try { + type = GetSupportedType(pObject, "score_type", supportedScoreTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + if (json_object_has_member(pObject, "dequantization")) { + array = json_object_get_array_member(pObject, "dequantization"); + JsonNode *node = json_array_get_element(array, 0); + JsonObject *object = json_node_get_object(node); + + deQuantization = std::make_shared( + json_object_get_double_member(object, "scale"), + json_object_get_double_member(object, "zeropoint")); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + }; +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/Utils.h b/mv_machine_learning/mv_inference/inference/include/Utils.h new file mode 100644 index 0000000..c8a37cd --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Utils.h @@ -0,0 +1,46 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTILS_H__ +#define __UTILS_H__ + +#include +#include +#include +#include + +#include + +namespace mediavision +{ +namespace inference +{ + template + T GetSupportedType(JsonObject* root, std::string typeName, const std::map& supportedTypes) + { + auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str())); + if (supportedType == supportedTypes.end()) { + throw std::invalid_argument(typeName); + } + + LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str()); + + return supportedType->second; + } +} /* Inference */ +} /* MediaVision */ + +#endif \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index d79b3ff..bf9a102 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1130,8 +1130,8 @@ namespace inference if (outputMetadata.GetScoreDeQuant()) { value = PostProcess::dequant(value, - outputMetadata.GetScoreDeQuant()->GetScale(), - outputMetadata.GetScoreDeQuant()->GetZeroPoint()); + outputMetadata.GetScoreDeQuantScale(), + outputMetadata.GetScoreDeQuantZeroPoint()); } if (outputMetadata.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) diff --git a/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp index 66d257c..d6bc290 100644 --- a/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp @@ -23,6 +23,7 @@ #include #include "InputMetadata.h" #include +#include "Utils.h" namespace mediavision { @@ -46,20 +47,6 @@ namespace inference mSupportedColorSpace.insert({"GRAY8", MEDIA_VISION_COLORSPACE_Y800}); } - template - T InputMetadata::GetSupportedType(JsonObject* root, std::string typeName, - std::map& supportedTypes) - { - auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str())); - if (supportedType == supportedTypes.end()) { - throw std::invalid_argument(typeName); - } - - LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str()); - - return supportedType->second; - } - int InputMetadata::GetTensorInfo(JsonObject *root) { LOGI("ENTER"); diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 176b0eb..8a1362a 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -21,15 +21,16 @@ #include #include #include + #include "OutputMetadata.h" +#include "Utils.h" + +using namespace mediavision::inference::box; namespace mediavision { namespace inference { - std::map OutputMetadata::supportedTensorShapes = - {{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}}; - OutputMetadata::OutputMetadata() : parsed(false), score(), @@ -41,84 +42,9 @@ namespace inference dispVecs(), edgeMap() { - - } - - ScoreInfo::ScoreInfo() : - name(), - dimInfo(), - threshold(0.0), - topNumber(1), - type(INFERENCE_SCORE_TYPE_NORMAL), - deQuantization(nullptr) - { - // Score type - supportedScoreTypes.insert({"NORMAL", INFERENCE_SCORE_TYPE_NORMAL}); - supportedScoreTypes.insert({"SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID}); - } - - template - T OutputMetadata::GetSupportedType(JsonObject* root, std::string typeName, - std::map& supportedTypes) - { - auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str())); - if (supportedType == supportedTypes.end()) { - throw std::invalid_argument(typeName); - } - - LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str()); - - return supportedType->second; - } - - int ScoreInfo::ParseScore(JsonObject *root) - { - LOGI("ENTER"); - - JsonArray * rootArray = json_object_get_array_member(root, "score"); - unsigned int elements = json_array_get_length(rootArray); - - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - topNumber = static_cast(json_object_get_int_member(pObject, "top_number")); - LOGI("top number: %d", topNumber); - - threshold = static_cast(json_object_get_double_member(pObject, "threshold")); - LOGI("threshold: %1.3f", threshold); - - try { - type = OutputMetadata::GetSupportedType(pObject, "score_type", supportedScoreTypes); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - if (json_object_has_member(pObject, "dequantization")) { - array = json_object_get_array_member(pObject, "dequantization"); - JsonNode *node = json_array_get_element(array, 0); - JsonObject *object = json_node_get_object(node); - - deQuantization = std::make_shared( - json_object_get_double_member(object, "scale"), - json_object_get_double_member(object, "zeropoint")); - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; + // shape_type + mSupportedShapeType.insert({"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}); + mSupportedShapeType.insert({"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}); } int OutputMetadata::ParseScore(JsonObject *root) @@ -131,71 +57,6 @@ namespace inference return score.ParseScore(root); } - BoxInfo::BoxInfo() : - name(), - dimInfo(), - type(INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP), - order(), - coordinate(INFERENCE_BOX_COORDINATE_TYPE_RATIO), - decodingType(INFERENCE_BOX_DECODING_TYPE_BYPASS), - decodingInfo() - - { - supportedBoxTypes.insert({"ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP}); - supportedBoxTypes.insert({"ORIGIN_CENTER", INFERENCE_BOX_TYPE_ORIGIN_CENTER}); - - supportedBoxCoordinateTypes.insert({"RATIO", INFERENCE_BOX_COORDINATE_TYPE_RATIO}); - supportedBoxCoordinateTypes.insert({"PIXEL", INFERENCE_BOX_COORDINATE_TYPE_PIXEL}); - - supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS}); - supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR}); - } - - int BoxInfo::ParseBox(JsonObject *root) - { - LOGI("ENTER"); - - JsonArray * rootArray = json_object_get_array_member(root, "box"); - unsigned int elements = json_array_get_length(rootArray); - - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - try { - type = OutputMetadata::GetSupportedType(pObject, "box_type", supportedBoxTypes); - coordinate = OutputMetadata::GetSupportedType(pObject, "box_coordinate", supportedBoxCoordinateTypes); - decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedBoxDecodingTypes); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - array = json_object_get_array_member(pObject, "box_order"); - elements2 = json_array_get_length(array); - LOGI("box order should have 4 elements and it has [%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - auto val = static_cast(json_array_get_int_element(array, elem2)); - order.push_back(val); - LOGI("%d", val); - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - void DecodeInfo::AddAnchorBox(cv::Rect2f& anchor) { anchorBoxes.push_back(anchor); @@ -226,10 +87,16 @@ namespace inference return box.ParseBox(root); } - int Label::ParseLabel(JsonObject *root) + int OutputMetadata::ParseLabel(JsonObject *root) { LOGI("ENTER"); + if (!json_object_has_member(root, "label")) { + LOGE("No box outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + JsonArray * rootArray = json_object_get_array_member(root, "label"); unsigned int elements = json_array_get_length(rootArray); @@ -238,40 +105,32 @@ namespace inference JsonNode *pNode = json_array_get_element(rootArray, elem); JsonObject *pObject = json_node_get_object(pNode); - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); + label.name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", label.name.c_str()); JsonArray * array = json_object_get_array_member(pObject, "index"); unsigned int elements2 = json_array_get_length(array); LOGI("range dim: size[%u]", elements2); for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); + label.dimInfo.SetValidIndex(elem2); } } - LOGI("LEAVEL"); + LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; } - int OutputMetadata::ParseLabel(JsonObject *root) + int OutputMetadata::ParseNumber(JsonObject *root) { LOGI("ENTER"); - if (!json_object_has_member(root, "label")) { - LOGE("No box outputmetadata"); + if (!json_object_has_member(root, "number")) { + LOGE("No number outputmetadata"); LOGI("LEAVE"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - label.ParseLabel(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int Number::ParseNumber(JsonObject *root) - { // box JsonArray * rootArray = json_object_get_array_member(root, "number"); unsigned int elements = json_array_get_length(rootArray); @@ -281,31 +140,19 @@ namespace inference JsonNode *pNode = json_array_get_element(rootArray, elem); JsonObject *pObject = json_node_get_object(pNode); - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); + number.name = json_object_get_string_member(pObject,"name"); + + LOGI("layer: %s", number.name.c_str()); JsonArray * array = json_object_get_array_member(pObject, "index"); unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - } - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseNumber(JsonObject *root) - { - LOGI("ENTER"); + LOGI("range dim: size[%u]", elements2); - if (!json_object_has_member(root, "number")) { - LOGE("No number outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + number.dimInfo.SetValidIndex(elem2); } - number.ParseNumber(root); LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; @@ -560,7 +407,7 @@ namespace inference JsonObject *object = json_object_get_object_member(root, "nms"); try { - this->nmsParam.mode = OutputMetadata::GetSupportedType(object, "mode", this->nmsParam.supportedBoxNmsTypes); + this->nmsParam.mode = GetSupportedType(object, "mode", this->nmsParam.supportedBoxNmsTypes); } catch (const std::exception& e) { LOGE("Invalid %s", e.what()); return MEDIA_VISION_ERROR_INVALID_OPERATION; @@ -671,94 +518,6 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - Landmark::Landmark() : - name(), - dimInfo(), - type(INFERENCE_LANDMARK_TYPE_2D_SINGLE), - offset(), - coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO), - decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS), - heatMapInfo() - - { - supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE}); - supportedLandmarkTypes.insert({"2D_MULTI", INFERENCE_LANDMARK_TYPE_2D_MULTI}); - supportedLandmarkTypes.insert({"3D_SINGLE", INFERENCE_LANDMARK_TYPE_3D_SINGLE}); - - supportedLandmarkCoordinateTypes.insert({"RATIO", INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO}); - supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL}); - - supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS}); - supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP}); - supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE}); - } - - int Landmark::ParseLandmark(JsonObject *root) - { - // box - JsonArray * rootArray = json_object_get_array_member(root, "landmark"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = - static_cast(json_object_get_string_member(pObject,"name")); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - try { - type = OutputMetadata::GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes); - coordinate = OutputMetadata::GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes); - decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - offset = static_cast(json_object_get_int_member(pObject, "landmark_offset")); - LOGI("landmark offset: %d", offset); - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - inference_landmark_type_e Landmark::GetType() - { - return type; - } - - int Landmark::GetOffset() - { - return offset; - } - - inference_landmark_coorindate_type_e Landmark::GetCoordinate() - { - return coordinate; - } - - inference_landmark_decoding_type_e Landmark::GetDecodingType() - { - return decodingType; - } - - HeatMapInfo& Landmark::GetHeatMapInfo() - { - return heatMapInfo; - } - int OutputMetadata::ParseLandmark(JsonObject *root) { LOGI("ENTER"); @@ -812,7 +571,7 @@ namespace inference JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; try { - landmark.GetHeatMapInfo().shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes); + landmark.GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", mSupportedShapeType); } catch (const std::exception& e) { LOGE("Invalid %s", e.what()); return MEDIA_VISION_ERROR_INVALID_OPERATION; @@ -839,42 +598,6 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int OffsetVec::ParseOffset(JsonObject *root) - { - JsonArray * rootArray = json_object_get_array_member(root, "offset"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = - static_cast(json_object_get_string_member(pObject,"name")); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - try { - shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - int OutputMetadata::ParseOffset(JsonObject *root) { LOGI("ENTER"); @@ -885,44 +608,7 @@ namespace inference return MEDIA_VISION_ERROR_INVALID_OPERATION; } - offsetVec.ParseOffset(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - DispVec::DispVec() : - name(), - dimInfo(), - type(INFERENCE_DISPLACEMENT_TYPE_FORWARD), - shapeType(INFERENCE_TENSOR_SHAPE_NCHW) - { - supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD}); - supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD}); - } - - int DispVec::ParseDisplacement(JsonObject *root) - { - LOGI("ENTER"); - name = - static_cast(json_object_get_string_member(root,"name")); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(root, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if(static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - try { - shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes); - type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } + offsetVec.ParseOffset(root, mSupportedShapeType); LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; @@ -946,32 +632,10 @@ namespace inference for (auto& disp : dispVecs) { JsonNode *pNode = json_array_get_element(rootArray, elem++); JsonObject *pObject = json_node_get_object(pNode); - disp.ParseDisplacement(pObject); - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int Edge::ParseEdge(JsonObject *root) - { - LOGI("ENTER"); - JsonArray * rootArray = json_object_get_array_member(root, "edgemap"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - int pEdgeNode, cEdgeNode; - for (unsigned int elem = 0; elem < elements; ++elem) { - - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - pEdgeNode = json_object_get_int_member(pObject, "parent"); - cEdgeNode = json_object_get_int_member(pObject, "child"); - - edges.push_back(std::make_pair(pEdgeNode, cEdgeNode)); - LOGI("%ud: parent - child: %d - %d", elem, pEdgeNode, cEdgeNode); + disp.ParseDisplacement(pObject, mSupportedShapeType); } + LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; } @@ -1084,22 +748,5 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - - void DimInfo::SetValidIndex(int index) - { - LOGI("ENTER"); - - dims.push_back(index); - - LOGI("LEAVE"); - } - - std::vector DimInfo::GetValidIndexAll() const - { - LOGI("ENTER"); - - LOGI("LEAVE"); - return dims; - } } /* Inference */ } /* MediaVision */ diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 205f377..789842c 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.8.18 +Version: 0.8.19 Release: 1 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause -- 2.7.4 From 1667c0828c5e104db8a7d0523cb8593573f5c8ac Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Mon, 18 Oct 2021 16:06:27 +0900 Subject: [PATCH 06/16] mv_machine_learning: code refactoring to property parsing Did code refactoring to propery parsing of BoxInfo and Landmark classes. What this patch did, - Move parseLabel, ParseNumber, ParseDecodeInfo functions from OutputMetadata class to BoxInfo one because these properties are related to BoxInfo class not OutputMetadata class. - Move parseDisplayment, parseEdgeMap, parseDecodeInfo functions from OutputMetadata class to Landmark one because these properties are related to Landmark class not OutputMetadata class. This is just one step for next code refactoring. Change-Id: I0a17ee29f492bee53acacab339ba746a44495790 Signed-off-by: Inki Dae --- .../mv_inference/inference/include/BoxInfo.h | 150 ++++++++++++ .../mv_inference/inference/include/DecodeInfo.h | 1 + .../mv_inference/inference/include/Landmark.h | 113 +++++++++ .../inference/include/OutputMetadata.h | 33 +-- .../mv_inference/inference/include/ScoreInfo.h | 1 + .../mv_inference/inference/src/Inference.cpp | 10 +- .../mv_inference/inference/src/ObjectDecoder.cpp | 14 +- .../mv_inference/inference/src/OutputMetadata.cpp | 259 +-------------------- .../mv_inference/inference/src/PoseDecoder.cpp | 20 +- 9 files changed, 300 insertions(+), 301 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/include/BoxInfo.h b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h index ceffaa0..63bc28d 100644 --- a/mv_machine_learning/mv_inference/inference/include/BoxInfo.h +++ b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h @@ -22,15 +22,32 @@ #include #include +#include #include #include +#include "DecodeInfo.h" +#include "DimInfo.h" +#include "Utils.h" + namespace mediavision { namespace inference { namespace box { + struct Label + { + std::string name; + DimInfo dimInfo; + }; + + struct Number + { + std::string name; + DimInfo dimInfo; + }; + class BoxInfo { private: @@ -41,6 +58,8 @@ namespace box inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor DecodeInfo decodingInfo; + Label label; + Number number; std::map supportedBoxTypes; std::map supportedBoxCoordinateTypes; @@ -76,6 +95,9 @@ namespace box std::vector GetOrder() { return order; } int GetCoordinate() { return coordinate; } DecodeInfo& GetDecodeInfo() {return decodingInfo; } + std::string GetLabelName() { return label.name; } + std::string GetNumberName() { return number.name; } + DimInfo GetNumberDimInfo() { return number.dimInfo; } int ParseBox(JsonObject *root) { @@ -123,6 +145,134 @@ namespace box LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; } + + int ParseLabel(JsonObject *root) + { + LOGI("ENTER"); + + if (!json_object_has_member(root, "label")) { + LOGE("No box outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonArray * rootArray = json_object_get_array_member(root, "label"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + label.name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", label.name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + label.dimInfo.SetValidIndex(elem2); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int ParseNumber(JsonObject *root) + { + LOGI("ENTER"); + + if (!json_object_has_member(root, "number")) { + LOGE("No number outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // box + JsonArray * rootArray = json_object_get_array_member(root, "number"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + number.name = json_object_get_string_member(pObject,"name"); + + LOGI("layer: %s", number.name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + + LOGI("range dim: size[%u]", elements2); + + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + number.dimInfo.SetValidIndex(elem2); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int ParseDecodeInfo(JsonObject *root) + { + LOGI("ENTER"); + + // box + JsonArray * rootArray = json_object_get_array_member(root, "box"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + if (!json_object_has_member(pObject, "decoding_info")) { + LOGE("decoding_info is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); + if (!json_object_has_member(cObject, "anchor")) { + LOGE("anchor is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + int ret = GetDecodeInfo().ParseAnchorParam(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseAnchorParam[%d]", ret); + return ret; + } + + ret = GetDecodeInfo().ParseNms(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseNms[%d]", ret); + return ret; + } + + ret = GetDecodeInfo().ParseRotate(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseRotate[%d]", ret); + return ret; + } + + ret = GetDecodeInfo().ParseRoiOption(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseRoiOption[%d]", ret); + return ret; + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } }; } /* box */ } /* Inference */ diff --git a/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h index a872c3a..7cdbca4 100644 --- a/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h +++ b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h @@ -24,6 +24,7 @@ #include #include +#include "Utils.h" namespace mediavision { diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h index 2fe6c9a..26dda7f 100644 --- a/mv_machine_learning/mv_inference/inference/include/Landmark.h +++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h @@ -20,7 +20,12 @@ #include #include #include + #include +#include +#include "DimInfo.h" +#include "DispVec.h" +#include "Utils.h" /** * @file Landmark.h @@ -67,6 +72,8 @@ namespace inference 1: decoding heatmap, 2: decoding heatmap with refinement */ HeatMapInfo heatMapInfo; + std::vector dispVecs; + Edge edgeMap; std::map supportedLandmarkTypes; std::map supportedLandmarkCoordinateTypes; @@ -139,6 +146,108 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } + int ParseDisplacement(JsonObject *root, + const std::map& supportedShapeType) + { + LOGI("ENTER"); + + if (!json_object_has_member(root, "displacement")) { + LOGI("No displacement outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonArray * rootArray = json_object_get_array_member(root, "displacement"); + unsigned int elements = json_array_get_length(rootArray); + + dispVecs.resize(elements); + unsigned int elem = 0; + for (auto& disp : dispVecs) { + JsonNode *pNode = json_array_get_element(rootArray, elem++); + JsonObject *pObject = json_node_get_object(pNode); + + disp.ParseDisplacement(pObject, supportedShapeType); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int ParseEdgeMap(JsonObject * root) + { + LOGI("ENTER"); + + if (!json_object_has_member(root, "edgemap")) { + LOGI("No edgemap outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + edgeMap.ParseEdge(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int ParseDecodeInfo(JsonObject *root, + const std::map& supportedShapeType) + { + LOGI("ENTER"); + + // box + JsonArray * rootArray = json_object_get_array_member(root, "landmark"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + if (!json_object_has_member(pObject, "decoding_info")) { + LOGE("decoding_info is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); + if (!json_object_has_member(cObject, "heatmap")) { + LOGE("heatmap is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; + try { + GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + std::vector heatMapIndexes = GetDimInfo().GetValidIndexAll(); + if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + GetHeatMapInfo().cIdx = heatMapIndexes[0]; + GetHeatMapInfo().hIdx = heatMapIndexes[1]; + GetHeatMapInfo().wIdx = heatMapIndexes[2]; + } else { + GetHeatMapInfo().hIdx = heatMapIndexes[0]; + GetHeatMapInfo().wIdx = heatMapIndexes[1]; + GetHeatMapInfo().cIdx = heatMapIndexes[2]; + } + + if (json_object_has_member(object, "nms_radius")) { + GetHeatMapInfo().nmsRadius = static_cast(json_object_get_double_member(object, "nms_radius")); + LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius ); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + inference_landmark_type_e GetType() { return type; @@ -167,6 +276,10 @@ namespace inference std::string GetName() { return name; } DimInfo GetDimInfo() { return dimInfo; } + + std::vector& GetDispVecAll() { return dispVecs; } + + std::vector>& GetEdges() { return edgeMap.GetEdgesAll(); } }; } /* Inference */ } /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index 6724526..9385aa7 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -46,42 +46,20 @@ namespace mediavision { namespace inference { - struct Label - { - std::string name; - DimInfo dimInfo; - }; - - struct Number - { - std::string name; - DimInfo dimInfo; - }; - class OutputMetadata { private: bool parsed; ScoreInfo score; box::BoxInfo box; - Label label; - Number number; Landmark landmark; OffsetVec offsetVec; - std::vector dispVecs; - Edge edgeMap; std::map mSupportedShapeType; int ParseScore(JsonObject *root); int ParseBox(JsonObject *root); - int ParseLabel(JsonObject *root); - int ParseNumber(JsonObject *root); - int ParseBoxDecodeInfo(JsonObject *root); int ParseLandmark(JsonObject *root); - int ParseLandmarkDecodeInfo(JsonObject *root); int ParseOffset(JsonObject *root); - int ParseDisplacement(JsonObject *root); - int ParseEdgeMap(JsonObject * root); public: /** @@ -120,10 +98,11 @@ namespace inference std::vector GetBoxOrder() { return box.GetOrder(); } box::DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); } inference_box_type_e GetBoxType() { return box.GetType(); } + std::string GetBoxLabelName() { return box.GetLabelName(); } + std::string GetBoxNumberName() { return box.GetNumberName(); } + DimInfo GetBoxNumberDimInfo() { return box.GetNumberDimInfo(); } + int GetScoreCoordinate() { return box.GetCoordinate(); } - std::string GetLabelName() { return label.name; } - std::string GetNumberName() { return number.name; } - DimInfo GetNumberDimInfo() { return number.dimInfo; } std::string GetLandmarkName() { return landmark.GetName(); } int GetLandmarkOffset() { return landmark.GetOffset(); } inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); } @@ -131,10 +110,10 @@ namespace inference HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); } inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); } inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); } + std::vector& GetLandmarkDispVecAll() { return landmark.GetDispVecAll(); } + std::vector>& GetLandmarkEdges() { return landmark.GetEdges(); } std::string GetOffsetVecName() { return offsetVec.GetName(); } inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); } - std::vector& GetDispVecAll() { return dispVecs; } - std::vector>& GetEdges() { return edgeMap.GetEdgesAll(); } }; } /* Inference */ } /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h index 24180d7..e3e3393 100644 --- a/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h +++ b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h @@ -24,6 +24,7 @@ #include #include "DimInfo.h" +#include "Utils.h" namespace mediavision { diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index bf9a102..0308e49 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -451,11 +451,11 @@ namespace inference if (!outputMeta.GetBoxName().empty()) mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName()); - if (!outputMeta.GetLabelName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetLabelName()); + if (!outputMeta.GetBoxLabelName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxLabelName()); - if (!outputMeta.GetNumberName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetNumberName()); + if (!outputMeta.GetBoxNumberName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxNumberName()); if (!outputMeta.GetLandmarkName().empty()) mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName()); @@ -463,7 +463,7 @@ namespace inference if (!outputMeta.GetOffsetVecName().empty()) mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName()); - for (auto& dispVec : outputMeta.GetDispVecAll()) { + for (auto& dispVec : outputMeta.GetLandmarkDispVecAll()) { mConfig.mOutputLayerNames.push_back(dispVec.GetName()); } } diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp index 4d5e36e..e631ff0 100755 --- a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp @@ -28,15 +28,15 @@ namespace inference int ObjectDecoder::init() { if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - if (!mTensorBuffer.exist(mMeta.GetLabelName()) || - !mTensorBuffer.exist(mMeta.GetNumberName()) ) { + if (!mTensorBuffer.exist(mMeta.GetBoxLabelName()) || + !mTensorBuffer.exist(mMeta.GetBoxNumberName()) ) { LOGE("buffer buffers named of %s or %s are NULL", - mMeta.GetLabelName().c_str(), mMeta.GetNumberName().c_str()); + mMeta.GetBoxLabelName().c_str(), mMeta.GetBoxNumberName().c_str()); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - std::vector indexes = mMeta.GetNumberDimInfo().GetValidIndexAll(); + std::vector indexes = mMeta.GetBoxNumberDimInfo().GetValidIndexAll(); if (indexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; @@ -45,7 +45,7 @@ namespace inference // mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS. // Otherwise it is set already within ctor. mNumberOfOjects = mTensorBuffer.getValue( - mMeta.GetNumberName(), indexes[0]); + mMeta.GetBoxNumberName(), indexes[0]); } else { if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) { LOGE("Anchor boxes are required but empty."); @@ -101,9 +101,9 @@ namespace inference } Box box = { - .index = mMeta.GetLabelName().empty() ? + .index = mMeta.GetBoxLabelName().empty() ? label : - mTensorBuffer.getValue(mMeta.GetLabelName(), idx), + mTensorBuffer.getValue(mMeta.GetBoxLabelName(), idx), .score = score, .location = cv::Rect2f(cx, cy, cWidth, cHeight) }; diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 8a1362a..738116f 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -35,12 +35,8 @@ namespace inference parsed(false), score(), box(), - label(), - number(), landmark(), - offsetVec(), - dispVecs(), - edgeMap() + offsetVec() { // shape_type mSupportedShapeType.insert({"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}); @@ -87,141 +83,6 @@ namespace inference return box.ParseBox(root); } - int OutputMetadata::ParseLabel(JsonObject *root) - { - LOGI("ENTER"); - - if (!json_object_has_member(root, "label")) { - LOGE("No box outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonArray * rootArray = json_object_get_array_member(root, "label"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - label.name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", label.name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - label.dimInfo.SetValidIndex(elem2); - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseNumber(JsonObject *root) - { - LOGI("ENTER"); - - if (!json_object_has_member(root, "number")) { - LOGE("No number outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // box - JsonArray * rootArray = json_object_get_array_member(root, "number"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - number.name = json_object_get_string_member(pObject,"name"); - - LOGI("layer: %s", number.name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - - LOGI("range dim: size[%u]", elements2); - - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - number.dimInfo.SetValidIndex(elem2); - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseBoxDecodeInfo(JsonObject *root) - { - LOGI("ENTER"); - - if (!json_object_has_member(root, "box")) { - LOGE("No box outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - // box - JsonArray * rootArray = json_object_get_array_member(root, "box"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - if (!json_object_has_member(pObject, "decoding_info")) { - LOGE("decoding_info is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); - if (!json_object_has_member(cObject, "anchor")) { - LOGE("anchor is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - int ret = box.GetDecodeInfo().ParseAnchorParam(cObject); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to ParseAnchorParam[%d]", ret); - return ret; - } - - ret = box.GetDecodeInfo().ParseNms(cObject); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to ParseNms[%d]", ret); - return ret; - } - - ret = box.GetDecodeInfo().ParseRotate(cObject); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to ParseRotate[%d]", ret); - return ret; - } - - ret = box.GetDecodeInfo().ParseRoiOption(cObject); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to ParseRoiOption[%d]", ret); - return ret; - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - - } - int DecodeInfo::ParseAnchorParam(JsonObject *root) { JsonObject *object = json_object_get_object_member(root, "anchor") ; @@ -534,70 +395,6 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int OutputMetadata::ParseLandmarkDecodeInfo(JsonObject *root) - { - LOGI("ENTER"); - - if (!json_object_has_member(root, "landmark")) { - LOGI("No landmark outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - // box - JsonArray * rootArray = json_object_get_array_member(root, "landmark"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - if (!json_object_has_member(pObject, "decoding_info")) { - LOGE("decoding_info is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); - if (!json_object_has_member(cObject, "heatmap")) { - LOGE("heatmap is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; - try { - landmark.GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", mSupportedShapeType); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - std::vector heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll(); - if (landmark.GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { - landmark.GetHeatMapInfo().cIdx = heatMapIndexes[0]; - landmark.GetHeatMapInfo().hIdx = heatMapIndexes[1]; - landmark.GetHeatMapInfo().wIdx = heatMapIndexes[2]; - } else { - landmark.GetHeatMapInfo().hIdx = heatMapIndexes[0]; - landmark.GetHeatMapInfo().wIdx = heatMapIndexes[1]; - landmark.GetHeatMapInfo().cIdx = heatMapIndexes[2]; - } - - if (json_object_has_member(object, "nms_radius")) { - landmark.GetHeatMapInfo().nmsRadius = static_cast(json_object_get_double_member(object, "nms_radius")); - LOGI("nms is enabled with %3.f", landmark.GetHeatMapInfo().nmsRadius ); - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - int OutputMetadata::ParseOffset(JsonObject *root) { LOGI("ENTER"); @@ -614,48 +411,6 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - int OutputMetadata::ParseDisplacement(JsonObject *root) - { - LOGI("ENTER"); - - if (!json_object_has_member(root, "displacement")) { - LOGI("No displacement outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonArray * rootArray = json_object_get_array_member(root, "displacement"); - unsigned int elements = json_array_get_length(rootArray); - - dispVecs.resize(elements); - unsigned int elem = 0; - for (auto& disp : dispVecs) { - JsonNode *pNode = json_array_get_element(rootArray, elem++); - JsonObject *pObject = json_node_get_object(pNode); - - disp.ParseDisplacement(pObject, mSupportedShapeType); - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseEdgeMap(JsonObject * root) - { - LOGI("ENTER"); - - if (!json_object_has_member(root, "edgemap")) { - LOGI("No edgemap outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - edgeMap.ParseEdge(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - int OutputMetadata::Parse(JsonObject *root) { LOGI("ENTER"); @@ -676,20 +431,20 @@ namespace inference // addtional parsing is required according to decoding type if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - ret = ParseLabel(root); + ret = box.ParseLabel(root); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetLabel[%d]", ret); return ret; } - ret = ParseNumber(root); + ret = box.ParseNumber(root); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetNumber[%d]", ret); return ret; } } else if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) { - ret = ParseBoxDecodeInfo(root); + ret = box.ParseDecodeInfo(root); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetBoxDecodeInfo[%d]", ret); return ret; @@ -714,7 +469,7 @@ namespace inference if (!landmark.GetName().empty()) { if (landmark.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { - ret = ParseLandmarkDecodeInfo(root); + ret = landmark.ParseDecodeInfo(root, mSupportedShapeType); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret); return ret; @@ -728,13 +483,13 @@ namespace inference return ret; } - ret = ParseDisplacement(root); + ret = landmark.ParseDisplacement(root, mSupportedShapeType); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetDispVector[%d]", ret); return ret; } - ret = ParseEdgeMap(root); + ret = landmark.ParseEdgeMap(root); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetEdgeConnection[%d]", ret); return ret; diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index a1efd2d..ca04829 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -366,9 +366,9 @@ namespace inference LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y); LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score); - int index = static_cast(mMeta.GetEdges().size()) - 1; - for (auto riter = mMeta.GetEdges().rbegin(); - riter != mMeta.GetEdges().rend(); ++riter) { + int index = static_cast(mMeta.GetLandmarkEdges().size()) - 1; + for (auto riter = mMeta.GetLandmarkEdges().rbegin(); + riter != mMeta.GetLandmarkEdges().rend(); ++riter) { int fromKeyId = riter->second; int toKeyId = riter->first; @@ -387,8 +387,8 @@ namespace inference } index = 0; - for (auto iter = mMeta.GetEdges().begin(); - iter != mMeta.GetEdges().end(); ++iter) { + for (auto iter = mMeta.GetLandmarkEdges().begin(); + iter != mMeta.GetLandmarkEdges().end(); ++iter) { int fromKeyId = iter->first; int toKeyId = iter->second; @@ -459,16 +459,16 @@ namespace inference { LOGI("ENTER"); - LOGI("edge size: %zd", mMeta.GetEdges().size()); + LOGI("edge size: %zd", mMeta.GetLandmarkEdges().size()); int idxY = index.y * mHeatMapWidth - * static_cast(mMeta.GetEdges().size()) * 2; + * static_cast(mMeta.GetLandmarkEdges().size()) * 2; - idxY += index.x * static_cast(mMeta.GetEdges().size()) * 2 + edgeId; + idxY += index.x * static_cast(mMeta.GetLandmarkEdges().size()) * 2 + edgeId; - int idxX = idxY + static_cast(mMeta.GetEdges().size()); + int idxX = idxY + static_cast(mMeta.GetLandmarkEdges().size()); - for(auto& dispVec : mMeta.GetDispVecAll()){ + for(auto& dispVec : mMeta.GetLandmarkDispVecAll()){ if (dispVec.GetType() == type) { // 0: forward LOGI("%s", dispVec.GetName().c_str()); vector.x = mTensorBuffer.getValue(dispVec.GetName(), idxX); -- 2.7.4 From cc15518fce51c6e7e289701f2ccb07faddcc29ed Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 24 Nov 2021 17:39:23 +0900 Subject: [PATCH 07/16] mv_machine_learning: add SNPE inference engine support [Version] : 0.9.0-0 [Issue type] : new feature Change-Id: Iabfc4932d808296c8941bc8bbacf88b9fbe09616 Signed-off-by: Inki Dae --- include/mv_inference_type.h | 2 ++ mv_machine_learning/mv_inference/inference/src/Inference.cpp | 7 +++++++ packaging/capi-media-vision.spec | 4 ++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h index a259b77..5a6572d 100644 --- a/include/mv_inference_type.h +++ b/include/mv_inference_type.h @@ -65,6 +65,8 @@ typedef enum { MV_INFERENCE_BACKEND_ARMNN, /**< ARMNN (Since 6.0) */ MV_INFERENCE_BACKEND_MLAPI, /**< ML Single API of NNStreamer (Since 6.0) */ MV_INFERENCE_BACKEND_ONE, /**< On-device Neural Engine (Since 6.0) */ + MV_INFERENCE_BACKEND_NNTRAINER, /**< NNTrainer (Since 7.0) */ + MV_INFERENCE_BACKEND_SNPE, /**< SNPE Engine (Since 7.0) */ MV_INFERENCE_BACKEND_MAX /**< Backend MAX */ } mv_inference_backend_type_e; diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 0308e49..fdd0560 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -81,6 +81,8 @@ namespace inference { LOGI("ENTER"); + // Mediavision can support several inference engines via ML Single API + // "mlapi" means that the inference backend is used via ML Single API. mSupportedInferenceBackend.insert(std::make_pair( MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false))); mSupportedInferenceBackend.insert(std::make_pair( @@ -91,6 +93,8 @@ namespace inference MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false))); mSupportedInferenceBackend.insert(std::make_pair( MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false))); + mSupportedInferenceBackend.insert(std::make_pair( + MV_INFERENCE_BACKEND_SNPE, std::make_pair("mlapi", false))); CheckSupportedInferenceBackend(); @@ -115,6 +119,8 @@ namespace inference std::make_pair("onnx", INFERENCE_MODEL_ONNX)); mModelFormats.insert(std::make_pair( "nb", INFERENCE_MODEL_VIVANTE)); + mModelFormats.insert(std::make_pair( + "dlc", INFERENCE_MODEL_SNPE)); LOGI("LEAVE"); } @@ -938,6 +944,7 @@ namespace inference break; case INFERENCE_MODEL_TFLITE: case INFERENCE_MODEL_TORCH: + case INFERENCE_MODEL_SNPE: models.push_back(mConfig.mWeightFilePath); break; default: diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 789842c..1af9b65 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,7 +1,7 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.8.19 -Release: 1 +Version: 0.9.0 +Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause Source0: %{name}-%{version}.tar.gz -- 2.7.4 From 1cae83e2c74cefc4e5fe1cb2bd88ad984e67c39a Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Tue, 28 Dec 2021 19:31:33 +0900 Subject: [PATCH 08/16] test/machine_learning: add snpe model support [Version] : 0.10.0-0 [Issue type] : new feature Added a test case for SNPE engnie with dlc model. Change-Id: I7d20f9974300130ddeaf4e8eb77482d89dee0b9d Signed-off-by: Inki Dae --- packaging/capi-media-vision.spec | 2 +- .../inference/test_face_detection.cpp | 2 +- .../inference/test_image_classification.cpp | 41 ++++++++++++++++++++++ .../inference/test_inference_helper.cpp | 28 +++++++++++++-- .../inference/test_inference_helper.hpp | 7 +++- .../inference/test_pose_landmark_detection.cpp | 2 +- 6 files changed, 75 insertions(+), 7 deletions(-) diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 1af9b65..00147d8 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.9.0 +Version: 0.10.0 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/testsuites/machine_learning/inference/test_face_detection.cpp b/test/testsuites/machine_learning/inference/test_face_detection.cpp index 376a717..59a357f 100644 --- a/test/testsuites/machine_learning/inference/test_face_detection.cpp +++ b/test/testsuites/machine_learning/inference/test_face_detection.cpp @@ -40,7 +40,7 @@ public: TEST_P(TestFaceDetection, CPU_TFLITE_MobilenetV1_SSD) { - engine_config_hosted_cpu_tflite(engine_cfg, + engine_config_hosted_model_config(engine_cfg, FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH, _use_json_parser); if (!_use_json_parser) { const char *inputNodeName = "normalized_input_image_tensor"; diff --git a/test/testsuites/machine_learning/inference/test_image_classification.cpp b/test/testsuites/machine_learning/inference/test_image_classification.cpp index cdda3f6..103f6df 100644 --- a/test/testsuites/machine_learning/inference/test_image_classification.cpp +++ b/test/testsuites/machine_learning/inference/test_image_classification.cpp @@ -31,6 +31,13 @@ MV_CONFIG_PATH \ "/models/IC/tflite/quant_mobilenet_v1_224x224.tflite" +#define IC_LABEL_INCEPTION_V3_299_PATH \ + MV_CONFIG_PATH \ + "/models/IC_Q/snpe/imagenet_slim_labels.txt" +#define IC_SNPE_WEIGHT_QUANT_INCEPTION_V3_299_PATH \ + MV_CONFIG_PATH \ + "/models/IC_Q/snpe/inception_v3_quantized.dlc" + void _image_classified_cb(mv_source_h source, const int number_of_classes, const int *indices, const char **names, const float *confidences, void *user_data) @@ -286,6 +293,40 @@ TEST_P(TestImageClassification, CPU_TFLITE_QUANT_MobilenetV1) inferenceBanana(); } +TEST_P(TestImageClassification, SNPE_InceptionV3_Quantized) +{ + engine_config_hosted_cpu_snpe_user_model( + engine_cfg, IC_SNPE_WEIGHT_QUANT_INCEPTION_V3_299_PATH, + IC_LABEL_INCEPTION_V3_299_PATH, + _use_json_parser); + + if (!_use_json_parser) { + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "output" }; + + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_UINT8), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 299), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 299), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3), + MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName), MEDIA_VISION_ERROR_NONE); + ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, 1), MEDIA_VISION_ERROR_NONE); + } + + inferenceBanana(); +} + INSTANTIATE_TEST_CASE_P(Prefix, TestImageClassification, ::testing::Values( ParamTypeOne(false), diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp index 81a0380..9d5c95b 100644 --- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp +++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp @@ -15,7 +15,7 @@ TestInference::~TestInference() EXPECT_EQ(mv_destroy_engine_config(engine_cfg), MEDIA_VISION_ERROR_NONE); } -void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, +void engine_config_hosted_model_config(mv_engine_config_h handle, const char *tf_weight, const bool use_json_parser) { @@ -32,6 +32,14 @@ void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, handle, MV_INFERENCE_MODEL_META_FILE_PATH , meta_file_path.c_str()), MEDIA_VISION_ERROR_NONE); } +} + +void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle, + const char *tf_weight, + const char *user_file, + const bool use_json_parser) +{ + engine_config_hosted_model_config(handle, tf_weight, use_json_parser); EXPECT_EQ(mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, @@ -41,14 +49,28 @@ void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU), MEDIA_VISION_ERROR_NONE); + + EXPECT_EQ(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file), + MEDIA_VISION_ERROR_NONE); } -void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle, +void engine_config_hosted_cpu_snpe_user_model(mv_engine_config_h handle, const char *tf_weight, const char *user_file, const bool use_json_parser) { - engine_config_hosted_cpu_tflite(handle, tf_weight, use_json_parser); + engine_config_hosted_model_config(handle, tf_weight, use_json_parser); + + EXPECT_EQ(mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_SNPE), + MEDIA_VISION_ERROR_NONE); + EXPECT_EQ(mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU), + MEDIA_VISION_ERROR_NONE); + EXPECT_EQ(mv_engine_config_set_string_attribute( handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file), MEDIA_VISION_ERROR_NONE); diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.hpp b/test/testsuites/machine_learning/inference/test_inference_helper.hpp index a04fb00..3023d81 100644 --- a/test/testsuites/machine_learning/inference/test_inference_helper.hpp +++ b/test/testsuites/machine_learning/inference/test_inference_helper.hpp @@ -23,7 +23,7 @@ public: mv_source_h mv_source; }; -void engine_config_hosted_cpu_tflite(mv_engine_config_h handle, +void engine_config_hosted_model_config(mv_engine_config_h handle, const char *tf_weight, const bool use_json_parser); @@ -32,4 +32,9 @@ void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle, const char *user_file, const bool use_json_parser); +void engine_config_hosted_cpu_snpe_user_model(mv_engine_config_h handle, + const char *tf_weight, + const char *user_file, + const bool use_json_parser); + #endif //__TEST_INFERENCE_HELPER_HPP__ diff --git a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp index 58c4b43..623903a 100644 --- a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp +++ b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp @@ -39,7 +39,7 @@ public: TEST_P(TestPoseLandmarkDetection, CPU_TFLITE_MobilenetV1) { - engine_config_hosted_cpu_tflite( + engine_config_hosted_model_config( engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH, _use_json_parser); if (!_use_json_parser) { -- 2.7.4 From a32be06de5a36e685309e056c556fd9b135a8786 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 31 Dec 2021 10:50:53 +0900 Subject: [PATCH 09/16] mv_maching_learning: deprecate MLAPI and MAX types [Version] : 0.11.0-0 [Issue type] : cleanup Deprecated MV_INFERENCE_BACKEND_MLAPI and MV_INFERENCE_BACKEND_MAX types. MV_INFERENCE_BACKEND_MLAPI is not backend type but other API framework, and another may break the binary compatibility when adding a new enumeration value because MAX value is different from one of the binary So drop these two types. Change-Id: I391cd0d4b713e3d35fe263f1567f4cea3df60630 Signed-off-by: Inki Dae --- include/mv_inference_type.h | 6 +++--- .../mv_inference/inference/src/mv_inference_open.cpp | 2 ++ packaging/capi-media-vision.spec | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h index 5a6572d..1794b83 100644 --- a/include/mv_inference_type.h +++ b/include/mv_inference_type.h @@ -48,7 +48,7 @@ extern "C" { * run efficiently without modification on Embedded hardware. * (https://developer.arm.com/ip-products/processors/machine-learning/arm-nn) * #MV_INFERENCE_BACKEND_MLAPI Samsung-introduced open source ML single API framework of NNStreamer, which - * runs various NN models via tensor filters of NNStreamer. + * runs various NN models via tensor filters of NNStreamer. (Deprecated since 7.0) * (https://github.com/nnstreamer/nnstreamer) * #MV_INFERENCE_BACKEND_ONE Samsung-introduced open source inference engine called On-device Neural Engine, which * performs inference of a given NN model on various devices such as CPU, GPU, DSP and NPU. @@ -63,11 +63,11 @@ typedef enum { MV_INFERENCE_BACKEND_OPENCV, /**< OpenCV */ MV_INFERENCE_BACKEND_TFLITE, /**< TensorFlow-Lite */ MV_INFERENCE_BACKEND_ARMNN, /**< ARMNN (Since 6.0) */ - MV_INFERENCE_BACKEND_MLAPI, /**< ML Single API of NNStreamer (Since 6.0) */ + MV_INFERENCE_BACKEND_MLAPI, /**< @deprecated ML Single API of NNStreamer (Deprecated since 7.0) */ MV_INFERENCE_BACKEND_ONE, /**< On-device Neural Engine (Since 6.0) */ MV_INFERENCE_BACKEND_NNTRAINER, /**< NNTrainer (Since 7.0) */ MV_INFERENCE_BACKEND_SNPE, /**< SNPE Engine (Since 7.0) */ - MV_INFERENCE_BACKEND_MAX /**< Backend MAX */ + MV_INFERENCE_BACKEND_MAX /**< @deprecated Backend MAX (Deprecated since 7.0) */ } mv_inference_backend_type_e; /** diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp index a9fd490..5faa3ad 100644 --- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp @@ -125,6 +125,8 @@ static bool IsValidBackendType(const int backend_type) static bool IsConfigFilePathRequired(const int target_device_type, const int backend_type) { + LOGW("DEPRECATION WARNING : MV_INFERENCE_BACKEND_MLAPI type is deprecated and will be removed from next release."); + // In case of MV_INFERENCE_TARGET_DEVICE_CUSTOM via MLAPI backend, config file path is required. return (backend_type == MV_INFERENCE_BACKEND_MLAPI && target_device_type & MV_INFERENCE_TARGET_DEVICE_CUSTOM); diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 00147d8..50fcffc 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.10.0 +Version: 0.11.0 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause -- 2.7.4 From 5f741065916c718fd6a4b30f5532552cf6ef5d44 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Mon, 20 Dec 2021 18:01:24 +0900 Subject: [PATCH 10/16] inference: support movenet [Version] 0.12.0 [Issue type] new feature Movenet models with variants, Lightning3/4 and Thunder3/4, are supported. Models can be downloaded from tfhub.dev and meta files for the models are added as examples. In addition, testsuite with those Movenet models are updated. Change-Id: I1c526cba688842e408fad1c84ba7dff4d0320e80 Signed-off-by: Tae-Young Chung --- meta-template/pld_movenet_lightning3_192x192.json | 46 ++++++++++++ .../pld_movenet_lightning3_192x192_int8_quant.json | 46 ++++++++++++ meta-template/pld_movenet_lightning4_192x192.json | 46 ++++++++++++ .../pld_movenet_lightning4_192x192_int8_quant.json | 46 ++++++++++++ meta-template/pld_movenet_thunder3_256x256.json | 46 ++++++++++++ .../pld_movenet_thunder3_256x256_int8_quant.json | 46 ++++++++++++ meta-template/pld_movenet_thunder4_256x256.json | 46 ++++++++++++ .../pld_movenet_thunder4_256x256_int8_quant.json | 46 ++++++++++++ .../mv_inference/inference/include/Landmark.h | 1 + .../inference/include/OutputMetadataTypes.h | 1 + .../mv_inference/inference/src/Inference.cpp | 7 +- .../mv_inference/inference/src/OutputMetadata.cpp | 3 +- .../mv_inference/inference/src/PoseDecoder.cpp | 24 ++++++- packaging/capi-media-vision.spec | 2 +- .../inference/inference_test_suite.c | 84 ++++++++++++++++++++++ 15 files changed, 484 insertions(+), 6 deletions(-) create mode 100644 meta-template/pld_movenet_lightning3_192x192.json create mode 100644 meta-template/pld_movenet_lightning3_192x192_int8_quant.json create mode 100644 meta-template/pld_movenet_lightning4_192x192.json create mode 100644 meta-template/pld_movenet_lightning4_192x192_int8_quant.json create mode 100644 meta-template/pld_movenet_thunder3_256x256.json create mode 100644 meta-template/pld_movenet_thunder3_256x256_int8_quant.json create mode 100644 meta-template/pld_movenet_thunder4_256x256.json create mode 100644 meta-template/pld_movenet_thunder4_256x256_int8_quant.json diff --git a/meta-template/pld_movenet_lightning3_192x192.json b/meta-template/pld_movenet_lightning3_192x192.json new file mode 100644 index 0000000..f40c6ff --- /dev/null +++ b/meta-template/pld_movenet_lightning3_192x192.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 192, 192, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_lightning3_192x192_int8_quant.json b/meta-template/pld_movenet_lightning3_192x192_int8_quant.json new file mode 100644 index 0000000..f40c6ff --- /dev/null +++ b/meta-template/pld_movenet_lightning3_192x192_int8_quant.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 192, 192, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_lightning4_192x192.json b/meta-template/pld_movenet_lightning4_192x192.json new file mode 100644 index 0000000..bcbf9d0 --- /dev/null +++ b/meta-template/pld_movenet_lightning4_192x192.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "serving_default_input_0:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 192, 192, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "StatefulPartitionedCall_0:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "StatefulPartitionedCall_0:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_lightning4_192x192_int8_quant.json b/meta-template/pld_movenet_lightning4_192x192_int8_quant.json new file mode 100644 index 0000000..bcbf9d0 --- /dev/null +++ b/meta-template/pld_movenet_lightning4_192x192_int8_quant.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "serving_default_input_0:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 192, 192, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "StatefulPartitionedCall_0:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "StatefulPartitionedCall_0:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_thunder3_256x256.json b/meta-template/pld_movenet_thunder3_256x256.json new file mode 100644 index 0000000..5d22c4a --- /dev/null +++ b/meta-template/pld_movenet_thunder3_256x256.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 256, 256, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_thunder3_256x256_int8_quant.json b/meta-template/pld_movenet_thunder3_256x256_int8_quant.json new file mode 100644 index 0000000..5d22c4a --- /dev/null +++ b/meta-template/pld_movenet_thunder3_256x256_int8_quant.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 256, 256, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "Identity:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_thunder4_256x256.json b/meta-template/pld_movenet_thunder4_256x256.json new file mode 100644 index 0000000..d28d5c7 --- /dev/null +++ b/meta-template/pld_movenet_thunder4_256x256.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "serving_default_input:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 256, 256, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "StatefulPartitionedCall:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "StatefulPartitionedCall:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/pld_movenet_thunder4_256x256_int8_quant.json b/meta-template/pld_movenet_thunder4_256x256_int8_quant.json new file mode 100644 index 0000000..d28d5c7 --- /dev/null +++ b/meta-template/pld_movenet_thunder4_256x256_int8_quant.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "serving_default_input:0", + "shape_type" : "NHWC", + "shape_dims" : [ 1, 256, 256, 3], + "data_type" : "FLOAT32", + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "StatefulPartitionedCall:0", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.5, + "score_type" : "NORMAL" + } + ], + "landmark" : [ + { + "name" : "StatefulPartitionedCall:0", + "index" : [-1, -1, 1, -1], + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", + "decoding_type" : "BYPASS_MULTICHANNEL", + "landmark_offset" : 3 + } + ] + } +} diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h index 26dda7f..4aae027 100644 --- a/mv_machine_learning/mv_inference/inference/include/Landmark.h +++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h @@ -99,6 +99,7 @@ namespace inference supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL}); supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS}); + supportedLandmarkDecodingTypes.insert({"BYPASS_MULTICHANNEL", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL}); supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP}); supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE}); } diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h index 440fa76..7ce558b 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h @@ -67,6 +67,7 @@ namespace inference typedef enum { INFERENCE_LANDMARK_DECODING_TYPE_BYPASS, + INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL, INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP, INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE } inference_landmark_decoding_type_e; diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index fdd0560..8cb63c8 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1542,6 +1542,8 @@ namespace inference LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset(); + } else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { + number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]; } else { heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx]; heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx]; @@ -1635,7 +1637,8 @@ namespace inference int heatMapHeight = 0; int heatMapChannel = 0; - if (outputMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || + outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx]; heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx]; heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx]; @@ -1652,6 +1655,8 @@ namespace inference if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset(); + else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) + landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]; poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel : static_cast(mUserListName.size()); diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 738116f..391b265 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -468,7 +468,8 @@ namespace inference } if (!landmark.GetName().empty()) { - if (landmark.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || + landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { ret = landmark.ParseDecodeInfo(root, mSupportedShapeType); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret); diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index ca04829..e1596aa 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -56,7 +56,8 @@ namespace inference return MEDIA_VISION_ERROR_INVALID_OPERATION; } - if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || + mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { LOGI("Skip init"); return MEDIA_VISION_ERROR_NONE; } @@ -245,14 +246,16 @@ namespace inference mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mPoseLandmarks.resize(1); - if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || + mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks); } else { mPoseLandmarks[0].landmarks.resize(mHeatMapChannel); } } - if (mMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || + mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { while (!mCandidates.empty()) { LandmarkPoint &root = mCandidates.front(); @@ -311,6 +314,21 @@ namespace inference for (auto& pose : mPoseLandmarks) { pose.score /= static_cast(mHeatMapChannel); } + } else if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { + int landmarkOffset = mMeta.GetLandmarkOffset(); + for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { + float py = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset); + float px = mTensorBuffer.getValue(mMeta.GetLandmarkName(), idx * landmarkOffset + 1); + float pscore = mTensorBuffer.getValue(mMeta.GetScoreName(), idx * landmarkOffset + 2); + + mPoseLandmarks[0].landmarks[idx].score = pscore; + mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1); + mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px / scaleWidth, py / scaleHeight); + mPoseLandmarks[0].landmarks[idx].id = idx; + mPoseLandmarks[0].landmarks[idx].valid = true; + + LOGI("idx[%d]: %.4f, %.4f, score: %.4f", idx, px, py, pscore); + } } else { // multi pose is not supported std::vector scoreIndexes = mMeta.GetScoreDimInfo().GetValidIndexAll(); diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 50fcffc..2aeff26 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.11.0 +Version: 0.12.0 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 188524d..681afab 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -224,6 +224,42 @@ #define PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH \ "/usr/share/capi-media-vision/models/PLD/tflite/pld_int8_movenet.tflite" +#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256.tflite" +#define PLD_TFLITE_META_MOVENET_THUNDER3_256_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256.json" +#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192.tflite" +#define PLD_TFLITE_META_MOVENET_LIGHTNING3_192_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192.json" + +#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256_int8_quant.tflite" +#define PLD_TFLITE_META_MOVENET_THUNDER3_256_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256_int8_quant.json" +#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192_int8_quant.tflite" +#define PLD_TFLITE_META_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192_int8_quant.json" + +#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256.tflite" +#define PLD_TFLITE_META_MOVENET_THUNDER4_256_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256.json" +#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192.tflite" +#define PLD_TFLITE_META_MOVENET_LIGHTNING4_192_FLOAT32_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192.json" + +#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256_int8_quant.tflite" +#define PLD_TFLITE_META_MOVENET_THUNDER4_256_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256_int8_quant.json" +#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192_int8_quant.tflite" +#define PLD_TFLITE_META_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192_int8_quant.json" + #define TASK_IC 0 #define TASK_OD 1 #define TASK_FD 2 @@ -1622,6 +1658,14 @@ int perform_pose_landmark_detection() "Hosted[o]: TFLITE(CPU) + CPM", "Hosted[o]: TFLITE(CPU) + MOBILENET_V1_POSENET", "Hosted[x]: TFLITE(CPU) + INT8_MOVENET", + "Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_THUNDER3", + "Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_LIGHTNING3", + "Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_THUNDER3", + "Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_LIGHTNING3", + "Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_THUNDER4", + "Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_LIGHTNING4", + "Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_THUNDER4", + "Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_LIGHTNING4", }; int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); @@ -1655,6 +1699,46 @@ int perform_pose_landmark_detection() engine_cfg, PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH, NULL); } break; + case 5: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_FLOAT32_PATH, + PLD_TFLITE_META_MOVENET_THUNDER3_256_FLOAT32_PATH); + } break; + case 6: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_FLOAT32_PATH, + PLD_TFLITE_META_MOVENET_LIGHTNING3_192_FLOAT32_PATH); + } break; + case 7: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_INT8_QUANT_PATH, + PLD_TFLITE_META_MOVENET_THUNDER3_256_INT8_QUANT_PATH); + } break; + case 8: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH, + PLD_TFLITE_META_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH); + } break; + case 9: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_FLOAT32_PATH, + PLD_TFLITE_META_MOVENET_THUNDER4_256_FLOAT32_PATH); + } break; + case 10: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_FLOAT32_PATH, + PLD_TFLITE_META_MOVENET_LIGHTNING4_192_FLOAT32_PATH); + } break; + case 11: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_INT8_QUANT_PATH, + PLD_TFLITE_META_MOVENET_THUNDER4_256_INT8_QUANT_PATH); + } break; + case 12: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH, + PLD_TFLITE_META_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH); + } break; } if (err != MEDIA_VISION_ERROR_NONE) { printf("Fail to perform config [err:%i]\n", err); -- 2.7.4 From 53b0a9274aae3b2d06b27e62f3b0b1070a3a0423 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Tue, 11 Jan 2022 20:26:14 +0900 Subject: [PATCH 11/16] test/machine_learning: fix device attribute type [Version] : 0.12.1-0 [Issue type] : bug fix Fixed device attribute type. Legacy device attribute type has been deprecated so use new one. Change-Id: Ia31a5be01595e6d39f3dff783e5b4d0982987e86 Signed-off-by: Inki Dae --- packaging/capi-media-vision.spec | 2 +- .../machine_learning/inference/test_inference_helper.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 2aeff26..8af3d73 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.0 +Version: 0.12.1 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp index 9d5c95b..c041f48 100644 --- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp +++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp @@ -46,8 +46,8 @@ void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle, MV_INFERENCE_BACKEND_TFLITE), MEDIA_VISION_ERROR_NONE); EXPECT_EQ(mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU), + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU), MEDIA_VISION_ERROR_NONE); EXPECT_EQ(mv_engine_config_set_string_attribute( @@ -67,11 +67,11 @@ void engine_config_hosted_cpu_snpe_user_model(mv_engine_config_h handle, MV_INFERENCE_BACKEND_SNPE), MEDIA_VISION_ERROR_NONE); EXPECT_EQ(mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU), + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU), MEDIA_VISION_ERROR_NONE); EXPECT_EQ(mv_engine_config_set_string_attribute( handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file), MEDIA_VISION_ERROR_NONE); -} \ No newline at end of file +} -- 2.7.4 From af72d3db5f2a1894756ba92a9427f5bd560aac91 Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Wed, 19 Jan 2022 23:15:07 -0500 Subject: [PATCH 12/16] packaging: Change gcov object install path [Version] : 0.12.2-0 [Issue type] : feature request Change gcov install path with append package prefix for better directory structure. Change-Id: Icf586a264e1f8a37db31c134e5e2ea9f82968e21 Signed-off-by: Kwang Son --- packaging/capi-media-vision.spec | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 8af3d73..70a2bc1 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.1 +Version: 0.12.2 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause @@ -229,8 +229,8 @@ cp media-vision-config.json %{buildroot}%{_datadir}/%{name}/ %make_install %if 0%{?gcov:1} -mkdir -p %{buildroot}%{_datadir}/gcov/obj -install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj +mkdir -p %{buildroot}%{_datadir}/gcov/obj/%{name} +install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj/%{name} %endif %post -p /sbin/ldconfig -- 2.7.4 From e3ebe78d9e3ac4512fca897da7c1e0eedf4752cc Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Fri, 21 Jan 2022 09:28:32 +0900 Subject: [PATCH 13/16] face/image: Add opencv's tracking header files [Version] 0.12.3-0 [Issue type] Update From OpenCV-4.5.3, a tracking module is moved to contrib and some tracking modes is changed to legacy. Change-Id: Ibdfcc89abf5f10a8d874da81ab3fc4601cec5423 Signed-off-by: Tae-Young Chung --- mv_face/face/include/FaceTracker.h | 3 ++- mv_image/image/include/Tracking/ObjectTracker.h | 1 + mv_surveillance/surveillance/include/MFTracker.h | 1 + packaging/capi-media-vision.spec | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/mv_face/face/include/FaceTracker.h b/mv_face/face/include/FaceTracker.h index 5266fea..210cc41 100644 --- a/mv_face/face/include/FaceTracker.h +++ b/mv_face/face/include/FaceTracker.h @@ -44,6 +44,7 @@ #include #include +#include namespace cv { @@ -59,7 +60,7 @@ by authors to outperform MIL). During the implementation period the code at , the courtesy of the author Arthur Amarra, was used for the reference purpose. */ -class FaceTracker : public TrackerMedianFlow { +class FaceTracker : public legacy::TrackerMedianFlow { public: struct Params { /** diff --git a/mv_image/image/include/Tracking/ObjectTracker.h b/mv_image/image/include/Tracking/ObjectTracker.h index e3e6894..9486b51 100644 --- a/mv_image/image/include/Tracking/ObjectTracker.h +++ b/mv_image/image/include/Tracking/ObjectTracker.h @@ -18,6 +18,7 @@ #define __MEDIA_VISION_OBJECTTRACKER_H__ #include +#include namespace MediaVision { namespace Image { diff --git a/mv_surveillance/surveillance/include/MFTracker.h b/mv_surveillance/surveillance/include/MFTracker.h index 4a58714..b93267a 100644 --- a/mv_surveillance/surveillance/include/MFTracker.h +++ b/mv_surveillance/surveillance/include/MFTracker.h @@ -18,6 +18,7 @@ #define __MEDIA_VISION_MFTRACKER_H__ #include +#include namespace mediavision { namespace surveillance { diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 70a2bc1..7bb6988 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.2 +Version: 0.12.3 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause -- 2.7.4 From da602b92861bfa60532443f68a2e46f6c43e9993 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Tue, 22 Feb 2022 15:30:50 +0900 Subject: [PATCH 14/16] Fix bugs [Version] 0.12.4-0 [Issue type] bug fix 1. Fix INVARIANT_RESULT.OP_ZERO - In case of GetLandmarkType(), its' return values are limited to the landmark type enumeration so that checking the return value of GetLandmarkType() is meaningless 2. Check null after malloc() 3. Initialize member variable in class ctor Change-Id: I0e23298ffcb7760eb86e24e37347eae5e992e421 Signed-off-by: Tae-Young Chung --- mv_face/face/src/mv_face_open.cpp | 22 ++++++++++++---------- .../mv_inference/inference/src/PoseDecoder.cpp | 6 ------ packaging/capi-media-vision.spec | 2 +- test/testsuites/image/image_test_suite.c | 4 ++++ .../inference/test_inference_helper.cpp | 3 ++- .../surveillance/surveillance_test_suite.c | 4 ++++ 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/mv_face/face/src/mv_face_open.cpp b/mv_face/face/src/mv_face_open.cpp index 443fd71..fb4496b 100644 --- a/mv_face/face/src/mv_face_open.cpp +++ b/mv_face/face/src/mv_face_open.cpp @@ -694,19 +694,21 @@ int mv_face_recognition_model_query_labels_open( static_cast(recognition_model); const std::set& learnedLabels = pRecModel->getFaceLabels(); - *number_of_labels = learnedLabels.size(); - - if ((*number_of_labels)) { - (*labels) = (int*)malloc(sizeof(int) * (*number_of_labels)); - - std::set::const_iterator it = learnedLabels.begin(); - int i = 0; - for (; it != learnedLabels.end(); ++it) { - (*labels)[i] = *it; - ++i; + auto _number_of_labels = learnedLabels.size(); + int *_pLabels = NULL; + if (_number_of_labels) { + _pLabels = (int*)malloc(sizeof(int) * (_number_of_labels)); + if(_pLabels == NULL) { + LOGE("Fail to alloc memory for %zu labels", _number_of_labels); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; } + + std::copy(learnedLabels.begin(), learnedLabels.end(), _pLabels); } + *number_of_labels = _number_of_labels; + *labels = _pLabels; + LOGD("List of the labels learned by the recognition model has been retrieved"); return MEDIA_VISION_ERROR_NONE; } diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index e1596aa..09c10d8 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -50,12 +50,6 @@ namespace inference { LOGI("ENTER"); - if (mMeta.GetLandmarkType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE || - mMeta.GetLandmarkType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) { - LOGE("Not supported landmark type"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { LOGI("Skip init"); diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 7bb6988..a8cd2a9 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.3 +Version: 0.12.4 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/testsuites/image/image_test_suite.c b/test/testsuites/image/image_test_suite.c index f1ff0de..a18ef5a 100644 --- a/test/testsuites/image/image_test_suite.c +++ b/test/testsuites/image/image_test_suite.c @@ -1742,6 +1742,10 @@ void perform_recognition_cases(GArray *image_objects) } mv_image_object_h *objects_pool = malloc(sizeof(mv_image_object_h) * image_objects->len); + if (objects_pool == NULL) { + printf("\nFail to alloc memory for %u objects.\n", image_objects->len); + break; + } int index = 0; for (; index < image_objects->len; ++index) objects_pool[index] = g_array_index(image_objects, testing_object_h, index)->entity; diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp index c041f48..ac427e6 100644 --- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp +++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp @@ -2,7 +2,8 @@ #include #include "test_inference_helper.hpp" -TestInference::TestInference() +TestInference::TestInference() : + _use_json_parser(false) { EXPECT_EQ(mv_create_engine_config(&engine_cfg), MEDIA_VISION_ERROR_NONE); EXPECT_EQ(mv_inference_create(&infer), MEDIA_VISION_ERROR_NONE); diff --git a/test/testsuites/surveillance/surveillance_test_suite.c b/test/testsuites/surveillance/surveillance_test_suite.c index 0a86b98..5b7810c 100644 --- a/test/testsuites/surveillance/surveillance_test_suite.c +++ b/test/testsuites/surveillance/surveillance_test_suite.c @@ -454,6 +454,10 @@ void add_roi_to_event(mv_surveillance_event_trigger_h event_trigger) PRINT_R("Incorrect input! Try again."); mv_point_s* roi = (mv_point_s*) malloc(sizeof(mv_point_s) * number_of_roi_points); + if (roi == NULL) { + PRINT_E("Fail to alloc roi err[%d].", MEDIA_VISION_ERROR_OUT_OF_MEMORY); + return; + } int x = 0; int y = 0; -- 2.7.4 From f5c8d848e03d7a629520c762a2785de2a96238af Mon Sep 17 00:00:00 2001 From: sangho park Date: Tue, 29 Mar 2022 11:07:41 +0900 Subject: [PATCH 15/16] fix cmake_minimum_required() deprecation warning. [Version] : 0.12.5-0 [Issue type] : Bug fix Change-Id: I6b178d31b204ebdc38860bd82ed942867782ee0f Signed-off-by: sangho park --- CMakeLists.txt | 2 +- mv_barcode/barcode_detector/CMakeLists.txt | 2 +- mv_barcode/barcode_generator/CMakeLists.txt | 2 +- mv_common/CMakeLists.txt | 2 +- mv_face/face/CMakeLists.txt | 2 +- mv_image/CMakeLists.txt | 2 +- mv_image/image/CMakeLists.txt | 2 +- mv_machine_learning/mv_inference/inference/CMakeLists.txt | 2 +- mv_surveillance/surveillance/CMakeLists.txt | 2 +- packaging/capi-media-vision.spec | 2 +- test/CMakeLists.txt | 2 +- test/assessment/CMakeLists.txt | 2 +- test/assessment/barcode/CMakeLists.txt | 2 +- test/assessment/face/CMakeLists.txt | 2 +- test/assessment/surveillance/CMakeLists.txt | 2 +- test/testsuites/CMakeLists.txt | 2 +- test/testsuites/common/image_helper/CMakeLists.txt | 2 +- test/testsuites/common/testsuite_common/CMakeLists.txt | 2 +- test/testsuites/common/video_helper/CMakeLists.txt | 2 +- test/testsuites/face/CMakeLists.txt | 2 +- test/testsuites/image/CMakeLists.txt | 2 +- test/testsuites/machine_learning/CMakeLists.txt | 2 +- test/testsuites/machine_learning/inference/CMakeLists.txt | 2 +- test/testsuites/surveillance/CMakeLists.txt | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9421c35..fb5b84e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ set(fw_name "capi-media-vision") project(${fw_name}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) option(BUILD_ML_ONLY "Build mv_machine_learning only" OFF) diff --git a/mv_barcode/barcode_detector/CMakeLists.txt b/mv_barcode/barcode_detector/CMakeLists.txt index 8840abc..b67afb1 100644 --- a/mv_barcode/barcode_detector/CMakeLists.txt +++ b/mv_barcode/barcode_detector/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_BARCODE_DETECTOR_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB MV_BARCODE_DET_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c") diff --git a/mv_barcode/barcode_generator/CMakeLists.txt b/mv_barcode/barcode_generator/CMakeLists.txt index ca40696..099ae2c 100644 --- a/mv_barcode/barcode_generator/CMakeLists.txt +++ b/mv_barcode/barcode_generator/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_BARCODE_GENERATOR_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB MV_BARCODE_GEN_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c") diff --git a/mv_common/CMakeLists.txt b/mv_common/CMakeLists.txt index 9fd4d67..b802adf 100644 --- a/mv_common/CMakeLists.txt +++ b/mv_common/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_COMMON_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB MV_COMMON_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c") diff --git a/mv_face/face/CMakeLists.txt b/mv_face/face/CMakeLists.txt index 74c0dce..3c30f7c 100644 --- a/mv_face/face/CMakeLists.txt +++ b/mv_face/face/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_FACE_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB MV_FACE_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c") diff --git a/mv_image/CMakeLists.txt b/mv_image/CMakeLists.txt index 2fe1e5d..12ea46f 100644 --- a/mv_image/CMakeLists.txt +++ b/mv_image/CMakeLists.txt @@ -1,4 +1,4 @@ project(mv_image_port) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_subdirectory(${PROJECT_SOURCE_DIR}/image) diff --git a/mv_image/image/CMakeLists.txt b/mv_image/image/CMakeLists.txt index e51e9a2..b0ff1c9 100644 --- a/mv_image/image/CMakeLists.txt +++ b/mv_image/image/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_IMAGE_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB_RECURSE MV_IMAGE_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c") diff --git a/mv_machine_learning/mv_inference/inference/CMakeLists.txt b/mv_machine_learning/mv_inference/inference/CMakeLists.txt index cd20e57..41d6816 100644 --- a/mv_machine_learning/mv_inference/inference/CMakeLists.txt +++ b/mv_machine_learning/mv_inference/inference/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_INFERENCE_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common iniparser json-glib-1.0) file(GLOB MV_INFERENCE_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") diff --git a/mv_surveillance/surveillance/CMakeLists.txt b/mv_surveillance/surveillance/CMakeLists.txt index 4b8c191..30c61f2 100644 --- a/mv_surveillance/surveillance/CMakeLists.txt +++ b/mv_surveillance/surveillance/CMakeLists.txt @@ -1,5 +1,5 @@ project(${MV_SURVEILLANCE_LIB_NAME}) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB MV_SURVEILLANCE_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c") diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index a8cd2a9..07dd13d 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.4 +Version: 0.12.5 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7008d14..b33a710 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,7 +2,7 @@ add_subdirectory(assessment) add_subdirectory(testsuites) project(mv_test) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_executable(${PROJECT_NAME} testsuites/barcode/test_barcode.cpp diff --git a/test/assessment/CMakeLists.txt b/test/assessment/CMakeLists.txt index aae4038..0b70a03 100644 --- a/test/assessment/CMakeLists.txt +++ b/test/assessment/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_test_assessment) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_subdirectory(${PROJECT_SOURCE_DIR}/barcode) add_subdirectory(${PROJECT_SOURCE_DIR}/face) diff --git a/test/assessment/barcode/CMakeLists.txt b/test/assessment/barcode/CMakeLists.txt index fbcb2dc..e12f939 100644 --- a/test/assessment/barcode/CMakeLists.txt +++ b/test/assessment/barcode/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_barcode_assessment) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_executable(${PROJECT_NAME} assessment_barcode.cpp) target_link_libraries(${PROJECT_NAME} ${MV_BARCODE_DETECTOR_LIB_NAME} diff --git a/test/assessment/face/CMakeLists.txt b/test/assessment/face/CMakeLists.txt index cf203b9..464acb6 100644 --- a/test/assessment/face/CMakeLists.txt +++ b/test/assessment/face/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_face_assessment) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) find_package(OpenCV REQUIRED imgcodecs) if(NOT OpenCV_FOUND) diff --git a/test/assessment/surveillance/CMakeLists.txt b/test/assessment/surveillance/CMakeLists.txt index c7a30bd..7fa3163 100644 --- a/test/assessment/surveillance/CMakeLists.txt +++ b/test/assessment/surveillance/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) project(mv_surveillance_assessment) find_package(OpenCV REQUIRED videoio xfeatures2d) diff --git a/test/testsuites/CMakeLists.txt b/test/testsuites/CMakeLists.txt index bc195d7..1ea5af5 100644 --- a/test/testsuites/CMakeLists.txt +++ b/test/testsuites/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_testsuites) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) set(INC_IMAGE_HELPER "${PROJECT_SOURCE_DIR}/common/image_helper/include") set(INC_VIDEO_HELPER "${PROJECT_SOURCE_DIR}/common/video_helper") diff --git a/test/testsuites/common/image_helper/CMakeLists.txt b/test/testsuites/common/image_helper/CMakeLists.txt index be4ec3f..4a8a008 100644 --- a/test/testsuites/common/image_helper/CMakeLists.txt +++ b/test/testsuites/common/image_helper/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_image_helper) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) file(GLOB MV_IMAGE_HELPER_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") diff --git a/test/testsuites/common/testsuite_common/CMakeLists.txt b/test/testsuites/common/testsuite_common/CMakeLists.txt index a7ca583..f93c508 100644 --- a/test/testsuites/common/testsuite_common/CMakeLists.txt +++ b/test/testsuites/common/testsuite_common/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_testsuite_common) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_library(${PROJECT_NAME} SHARED mv_testsuite_common.c) target_link_libraries(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} mv_image_helper) diff --git a/test/testsuites/common/video_helper/CMakeLists.txt b/test/testsuites/common/video_helper/CMakeLists.txt index 22296ee..43331fb 100644 --- a/test/testsuites/common/video_helper/CMakeLists.txt +++ b/test/testsuites/common/video_helper/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_video_helper) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0 capi-media-tool dlog) diff --git a/test/testsuites/face/CMakeLists.txt b/test/testsuites/face/CMakeLists.txt index ce57d1d..2e111e7 100644 --- a/test/testsuites/face/CMakeLists.txt +++ b/test/testsuites/face/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_face_test_suite) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_executable(${PROJECT_NAME} face_test_suite.c) target_link_libraries(${PROJECT_NAME} ${MV_FACE_LIB_NAME} diff --git a/test/testsuites/image/CMakeLists.txt b/test/testsuites/image/CMakeLists.txt index aa2ecfd..6b20bce 100644 --- a/test/testsuites/image/CMakeLists.txt +++ b/test/testsuites/image/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_image_test_suite) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) pkg_check_modules(GLIB_PKG glib-2.0) diff --git a/test/testsuites/machine_learning/CMakeLists.txt b/test/testsuites/machine_learning/CMakeLists.txt index 57785be..5b21eb4 100644 --- a/test/testsuites/machine_learning/CMakeLists.txt +++ b/test/testsuites/machine_learning/CMakeLists.txt @@ -1,4 +1,4 @@ project(machine_learning) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_subdirectory(${PROJECT_SOURCE_DIR}/inference) diff --git a/test/testsuites/machine_learning/inference/CMakeLists.txt b/test/testsuites/machine_learning/inference/CMakeLists.txt index cd480a7..0aabbe4 100644 --- a/test/testsuites/machine_learning/inference/CMakeLists.txt +++ b/test/testsuites/machine_learning/inference/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_infer_test_suite) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_executable(${PROJECT_NAME} inference_test_suite.c) diff --git a/test/testsuites/surveillance/CMakeLists.txt b/test/testsuites/surveillance/CMakeLists.txt index d87d645..1d731a7 100644 --- a/test/testsuites/surveillance/CMakeLists.txt +++ b/test/testsuites/surveillance/CMakeLists.txt @@ -1,5 +1,5 @@ project(mv_surveillance_test_suite) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.6...3.13) add_executable(${PROJECT_NAME} surveillance_test_suite.c) target_link_libraries(${PROJECT_NAME} ${MV_SURVEILLANCE_LIB_NAME} -- 2.7.4 From 2728d7dd19a92dc67eac29255935086da314b275 Mon Sep 17 00:00:00 2001 From: sangho park Date: Mon, 9 May 2022 14:53:43 +0900 Subject: [PATCH 16/16] clean up manual file copy [Version] 0.12.6-0 [Issue type] cleanup delete hard copy commands in spec file and implement in CMakeLists.txt Change-Id: I75841678480972f2a270b84db6fc533e006e1d5d Signed-off-by: sangho park --- CMakeLists.txt | 2 ++ packaging/capi-media-vision.spec | 6 +----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb5b84e..56672ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,3 +123,5 @@ configure_file( @ONLY ) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-inference.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig) + +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/media-vision-config.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name}) diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 07dd13d..22b0ee5 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.12.5 +Version: 0.12.6 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause @@ -222,10 +222,6 @@ find . -name '*.gcno' -exec cp '{}' gcov-obj ';' %endif %install -rm -rf %{buildroot} -mkdir -p %{buildroot}%{_datadir}/%{name} -cp media-vision-config.json %{buildroot}%{_datadir}/%{name}/ - %make_install %if 0%{?gcov:1} -- 2.7.4