machine_learning: update YOLO-V5 inference 61/271161/4
authorTae-Young Chung <ty83.chung@samsung.com>
Wed, 16 Feb 2022 07:01:05 +0000 (16:01 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Mon, 21 Feb 2022 08:55:50 +0000 (17:55 +0900)
[Version] 1.15.1-0
[Issue type] Update

Yolo-V5's output is 1x10647x85 in case of an input has [416, 416]. But, a converted dlc (snpe) from onnx has 1x13x13x255 without processing anchor box.
This commit includes a patch handling that kinds of models even though results are weird.

Change-Id: I5d53f72bc3896b92a5a59442190e2767887fd22d
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
16 files changed:
meta-template/yolov5m-fp32-416.json [new file with mode: 0644]
meta-template/yolov5m-quant-416.json [new file with mode: 0644]
meta-template/yolov5m.json [new file with mode: 0644]
meta-template/yolov5m_quantize.json [new file with mode: 0644]
meta-template/yolov5s-fp32-416.json [new file with mode: 0644]
meta-template/yolov5s-quant-416.json [new file with mode: 0644]
meta-template/yolov5s.json [new file with mode: 0644]
meta-template/yolov5s_quantize.json [new file with mode: 0644]
mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h
mv_machine_learning/mv_inference/inference/src/Inference.cpp
mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
packaging/capi-media-vision.spec
test/testsuites/machine_learning/inference/test_object_detection.cpp

diff --git a/meta-template/yolov5m-fp32-416.json b/meta-template/yolov5m-fp32-416.json
new file mode 100644 (file)
index 0000000..19dc9ec
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input_1",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [255.0, 255.0, 255.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [8, 16, 32],
+                        "offset_scales": 1,
+                        "type" : "NORMAL"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5m-quant-416.json b/meta-template/yolov5m-quant-416.json
new file mode 100644 (file)
index 0000000..5e093b1
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input_1",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [255.0, 255.0, 255.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [8, 16, 32],
+                        "offset_scales" : 1,
+                        "type" : "NORMAL"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5m.json b/meta-template/yolov5m.json
new file mode 100644 (file)
index 0000000..476363f
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "images",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Conv_356",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "SIGMOID"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Conv_356",
+                "index" : [-1, -1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [32],
+                        "offset_scales" : 3,
+                        "type" : "SIGMOID"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5m_quantize.json b/meta-template/yolov5m_quantize.json
new file mode 100644 (file)
index 0000000..476363f
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "images",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Conv_356",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "SIGMOID"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Conv_356",
+                "index" : [-1, -1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [32],
+                        "offset_scales" : 3,
+                        "type" : "SIGMOID"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5s-fp32-416.json b/meta-template/yolov5s-fp32-416.json
new file mode 100644 (file)
index 0000000..19dc9ec
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input_1",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [255.0, 255.0, 255.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [8, 16, 32],
+                        "offset_scales": 1,
+                        "type" : "NORMAL"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5s-quant-416.json b/meta-template/yolov5s-quant-416.json
new file mode 100644 (file)
index 0000000..5e093b1
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input_1",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [255.0, 255.0, 255.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Identity",
+                "index" : [-1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [8, 16, 32],
+                        "offset_scales" : 1,
+                        "type" : "NORMAL"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5s.json b/meta-template/yolov5s.json
new file mode 100644 (file)
index 0000000..6d7d4d4
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "images",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Conv_277",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "SIGMOID"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Conv_277",
+                "index" : [-1, -1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [32],
+                        "offset_scales" : 3,
+                        "type" : "SIGMOID"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/yolov5s_quantize.json b/meta-template/yolov5s_quantize.json
new file mode 100644 (file)
index 0000000..6d7d4d4
--- /dev/null
@@ -0,0 +1,61 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "images",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 416, 416, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Conv_277",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 5,
+                "threshold" : 0.4,
+                "score_type" : "SIGMOID"
+            }
+        ],
+        "box" : [
+            {
+                "name" : "Conv_277",
+                "index" : [-1, -1, -1, 1],
+                "box_type" : "ORIGIN_CENTER",
+                "box_order" : [0, 1, 2, 3],
+                "box_coordinate" : "RATIO",
+                "decoding_type" : "YOLO_ANCHOR",
+                "decoding_info" :
+                {
+                    "cell" :
+                    {
+                        "num_scales" : 3,
+                        "scales": [32],
+                        "offset_scales" : 3,
+                        "type" : "SIGMOID"
+                    },
+                    "nms" :
+                    {
+                        "mode": "STANDARD",
+                        "iou_threshold": 0.2
+                    }
+                }
+            }
+        ]
+    }
+}
index 5a478b3..d28444a 100644 (file)
@@ -56,6 +56,9 @@ namespace box
        struct CellParam {
                int numScales;
                std::vector<int> scales;
+               int offsetScales;
+               inference_score_type_e type;
+               std::map<std::string, inference_score_type_e> supportedCellType;
        };
 
        struct NMSParam {
@@ -95,6 +98,10 @@ namespace box
 
        public:
                DecodeInfo() {
+                       cellParam.type = INFERENCE_SCORE_TYPE_NORMAL;
+                       cellParam.supportedCellType.insert({"NORMAL", INFERENCE_SCORE_TYPE_NORMAL});
+                       cellParam.supportedCellType.insert({"SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID});
+
                        nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
                        nmsParam.iouThreshold = 0.2f;
                        nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
@@ -138,6 +145,8 @@ namespace box
                int ParseCellParam(JsonObject *root);
                std::vector<int>& GetCellScalesAll();
                int GetCellNumScales();
+               int GetCellOffsetScales();
+               inference_score_type_e GetCellType();
 
                // Nms param
                int ParseNms(JsonObject *root);
index 11c5cc2..9d725ee 100755 (executable)
@@ -49,7 +49,7 @@ namespace inference
                Boxes mResultBoxes;
 
                float decodeScore(int idx);
-               Box decodeBox(int idx, float score, int label = -1);
+               Box decodeBox(int idx, float score, int label = -1, int offset = 0);
                Box decodeBoxWithAnchor(int idx, int anchorIdx, float score, cv::Rect2f& anchor);
 
        public:
index f56d6c5..9d39be7 100755 (executable)
@@ -1263,7 +1263,7 @@ namespace inference
                                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                                }
                                boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
-                               numberOfObjects = boxOffset - 5;
+                               numberOfObjects = boxOffset / outputMeta.GetBoxDecodeInfo().GetCellNumScales() - 5;
                        }
 
                        ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
index 3c5d8bb..96193da 100755 (executable)
@@ -68,22 +68,30 @@ namespace inference
                return score < mMeta.GetScoreThreshold() ? 0.0f : score;
        }
 
-       Box ObjectDecoder::decodeBox(int idx, float score, int label)
+       Box ObjectDecoder::decodeBox(int idx, float score, int label, int offset)
        {
                // assume type is (cx,cy,w,h)
                // left or cx
                float cx = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
-                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[0]);
+                                                                       idx * mBoxOffset + offset + mMeta.GetBoxOrder()[0]);
                // top or cy
                float cy = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
-                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[1]);
+                                                                       idx * mBoxOffset + offset + mMeta.GetBoxOrder()[1]);
                // right or width
                float cWidth = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
-                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[2]);
+                                                                       idx * mBoxOffset + offset + mMeta.GetBoxOrder()[2]);
                // bottom or height
                float cHeight = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
-                                                                       idx * mBoxOffset + mMeta.GetBoxOrder()[3]);
+                                                                       idx * mBoxOffset + offset + mMeta.GetBoxOrder()[3]);
 
+               if (mMeta.GetBoxDecodeInfo().GetCellType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+                       cx = PostProcess::sigmoid(cx);
+                       cy = PostProcess::sigmoid(cy);
+                       cWidth = PostProcess::sigmoid(cWidth);
+                       cHeight = PostProcess::sigmoid(cHeight);
+               }
+
+               LOGI("cx:%.2f, cy:%.2f, cW:%.2f, cH:%.2f", cx, cy, cWidth, cHeight);
                // convert type to ORIGIN_CENTER if ORIGIN_LEFTTOP
                if (mMeta.GetBoxType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
                        float tmpCx = cx;
@@ -157,7 +165,8 @@ namespace inference
                        for (auto& scale : mMeta.GetBoxDecodeInfo().GetCellScalesAll()) {
                                totalIdx += (static_cast<int>(mScaleW) / scale
                                                        * static_cast<int>(mScaleH) / scale)
-                                                       * mMeta.GetBoxDecodeInfo().GetCellNumScales();
+                                                       * mMeta.GetBoxDecodeInfo().GetCellNumScales()
+                                                       / mMeta.GetBoxDecodeInfo().GetCellOffsetScales();
                        }
                        boxList.reserve(mNumberOfOjects);
                }
@@ -188,26 +197,29 @@ namespace inference
                                boxList.push_back(boxes);
                        } else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
                                int cellIdx = idx * mBoxOffset;
-                               float score = decodeScore(cellIdx + 4);
-                               if (score <= 0.0f) {
-                                       continue;
-                               }
-                               // need to check the score
-                               float objScore = 0.0f;
-                               int objIdx = 0;
-                               for (int objIdx_ = 0; objIdx_ < mNumberOfOjects; ++objIdx_) {
-                                       float objScore_ = decodeScore(cellIdx + 5 + objIdx_);
-                                       if (objScore_ > objScore) {
-                                               objScore = objScore_;
-                                               objIdx = objIdx_;
+                               for (int j = 0; j < mMeta.GetBoxDecodeInfo().GetCellOffsetScales(); ++j) {
+                                       float score = decodeScore(cellIdx + (mNumberOfOjects + 5) * j + 4);
+                                       if (score <= 0.0f) {
+                                               continue;
+                                       }
+                                       LOGI("score[%d]: %.2f", j, score);
+                                       // need to check the score
+                                       float topObjScore = 0.0f;
+                                       int topObjIdx = 0;
+                                       for (int objIdx_ = 0; objIdx_ < mNumberOfOjects; ++objIdx_) {
+                                               float objScore_ = decodeScore(cellIdx + (mNumberOfOjects + 5) * j + 5 + objIdx_);
+                                               if (objScore_ > topObjScore) {
+                                                       topObjScore = objScore_;
+                                                       topObjIdx = objIdx_;
+                                               }
                                        }
-                               }
 
-                               if (objScore <  mMeta.GetScoreThreshold())
-                                       continue;
+                                       if (topObjScore <  mMeta.GetScoreThreshold())
+                                               continue;
 
-                               Box box = decodeBox(idx, objScore, objIdx);
-                               boxes.push_back(box);
+                                       Box box = decodeBox(idx, topObjScore, topObjIdx, (mNumberOfOjects + 5) * j);
+                                       boxes.push_back(box);
+                               }
                        }
                }
 
index 24cd8b2..12a2da3 100755 (executable)
@@ -142,6 +142,15 @@ namespace inference
                        this->cellParam.scales.push_back(scale);
                        LOGI("scale: %d", scale);
                }
+
+               this->cellParam.offsetScales = static_cast<int>(json_object_get_int_member(object, "offset_scales"));
+               try {
+                       this->cellParam.type = GetSupportedType(object, "type", this->cellParam.supportedCellType);
+               } catch (const std::exception& e) {
+                       LOGE("Invalid %s", e.what());
+                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
+               }
+
                return MEDIA_VISION_ERROR_NONE;
        }
 
@@ -155,6 +164,16 @@ namespace inference
                return this->cellParam.numScales;
        }
 
+       int DecodeInfo::GetCellOffsetScales()
+       {
+               return this->cellParam.offsetScales;
+       }
+
+       inference_score_type_e DecodeInfo::GetCellType()
+       {
+               return this->cellParam.type;
+       }
+
        float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
        {
                return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
index 5faa3ad..144a003 100644 (file)
@@ -702,13 +702,17 @@ int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer,
        }
 
        for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
-               LOGE("names: %s", objectDetectionResults.names[output_idx].c_str());
+               LOGI("names: %s", objectDetectionResults.names[output_idx].c_str());
                names[output_idx] = objectDetectionResults.names[output_idx].c_str();
 
                locations[output_idx].point.x = objectDetectionResults.locations[output_idx].x;
                locations[output_idx].point.y = objectDetectionResults.locations[output_idx].y;
                locations[output_idx].width = objectDetectionResults.locations[output_idx].width;
                locations[output_idx].height = objectDetectionResults.locations[output_idx].height;
+               LOGI("%d, %d, %d, %d", locations[output_idx].point.x,
+                                                               locations[output_idx].point.y,
+                                                               locations[output_idx].width,
+                                                               locations[output_idx].height);
        }
 
        int *indices = objectDetectionResults.indices.data();
index d232162..8083326 100644 (file)
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.15.0
+Version:     0.15.1
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
index d1ace92..98925e9 100644 (file)
        MV_CONFIG_PATH \
        "/res/inference/images/dog2.jpg"
 
+#define OD_TFLITE_WEIGHT_YOLO_V5m_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5m-fp32-416.tflite"
+#define OD_SNPE_WEIGHT_YOLO_V5m_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5m-fp32-416.dlc"
+#define OD_SNPE_WEIGHT_QUANT_YOLO_V5m_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5m-quant-416.dlc"
+
+#define OD_TFLITE_WEIGHT_YOLO_V5s_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5s-fp32-416.tflite"
+#define OD_SNPE_WEIGHT_YOLO_V5s_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5s-fp32-416.dlc"
+#define OD_SNPE_WEIGHT_QUANT_YOLO_V5s_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5s-quant-416.dlc"
+
+#define OD_SNPE_WEIGHT_QC_YOLO_V5m_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5m.dlc"
+#define OD_SNPE_WEIGHT_QUANT_QC_YOLO_V5m_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5m_quantize.dlc"
+#define OD_SNPE_WEIGHT_QC_YOLO_V5s_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5s.dlc"
+#define OD_SNPE_WEIGHT_QUANT_QC_YOLO_V5s_416_PATH \
+       MV_CONFIG_PATH                      \
+       "/models/OD/snpe/yolov5s_quantize.dlc"
+
+#define OD_LABEL_YOLO_V5_416_PATH \
+       MV_CONFIG_PATH                \
+       "models/OD/snpe/label_coco_80.txt"
+
 void _object_detected_cb(mv_source_h source, const int number_of_objects,
                                                 const int *indices, const char **names,
                                                 const float *confidences,
@@ -45,6 +82,22 @@ public:
        }
 };
 
+class TestObjectDetectionSnpe : public TestInference
+{
+public:
+       void inferenceDog()
+       {
+               TestInference::ConfigureInference();
+
+               ASSERT_EQ(MediaVision::Common::ImageHelper::loadImageToSource(
+                                                 IMG_DOG, mv_source),
+                                 MEDIA_VISION_ERROR_NONE);
+               ASSERT_EQ(mv_inference_object_detect(mv_source, infer,
+                                                                                        _object_detected_cb, NULL),
+                                 MEDIA_VISION_ERROR_NONE);
+       }
+};
+
 TEST_P(TestObjectDetectionTflite, MobilenetV1_SSD)
 {
        engine_config_hosted_tflite_model(engine_cfg, OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH,
@@ -80,9 +133,128 @@ TEST_P(TestObjectDetectionTflite, MobilenetV1_SSD)
        inferenceDog();
 }
 
+
+TEST_P(TestObjectDetectionSnpe, YoloV5mQC)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_QC_YOLO_V5m_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5mQC_Quantized)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_QUANT_QC_YOLO_V5m_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5sQC)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_QC_YOLO_V5s_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5sQC_Quantized)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_QUANT_QC_YOLO_V5s_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionTflite, YoloV5m)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_tflite_model(
+                       engine_cfg, OD_TFLITE_WEIGHT_YOLO_V5m_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5m)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_YOLO_V5m_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5m_Quantized)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_QUANT_YOLO_V5m_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionTflite, YoloV5s)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_tflite_model(
+                       engine_cfg, OD_TFLITE_WEIGHT_YOLO_V5s_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5s)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_YOLO_V5s_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
+TEST_P(TestObjectDetectionSnpe, YoloV5s_Quantized)
+{
+       ASSERT_TRUE(_use_json_parser);
+       engine_config_hosted_snpe_model(
+                       engine_cfg, OD_SNPE_WEIGHT_QUANT_YOLO_V5s_416_PATH,
+                       OD_LABEL_YOLO_V5_416_PATH,
+                       _use_json_parser, _target_device_type);
+
+       inferenceDog();
+}
+
 INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetectionTflite,
                                                ::testing::Values(
                                                        ParamTypes(false, MV_INFERENCE_TARGET_DEVICE_CPU),
                                                        ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_CPU)
                                                )
+);
+
+INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetectionSnpe,
+                                               ::testing::Values(
+                                                       ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_CPU),
+                                                       ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_GPU),
+                                                       ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_CUSTOM)
+                                               )
 );
\ No newline at end of file