--- /dev/null
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "mv_object_detection_config.h"
+#include "yolov5.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+YoloV5::YoloV5(ObjectDetectionTaskType task_type) : ObjectDetection(task_type), _result()
+{}
+
+YoloV5::~YoloV5()
+{}
+
+static bool compareScore(const Box &box0, const Box &box1)
+{
+ return box0.score > box1.score;
+}
+
+static float calcIntersectionOverUnion(const Box &box0, const Box &box1, float input_width, float input_height)
+{
+ float area0 = box0.location.width * box0.location.height;
+ float area1 = box1.location.width * box1.location.height;
+
+ if (area0 <= 0.0f || area1 <= 0.0f)
+ return 0.0f;
+
+ float xmin0 = max(box0.location.x - box0.location.width * 0.5f, 0.0f);
+ float ymin0 = max(box0.location.y - box0.location.height * 0.5f, 0.0f);
+ float xmax0 = min(box0.location.x + box0.location.width * 0.5f, input_width);
+ float ymax0 = min(box0.location.y + box0.location.height * 0.5f, input_height);
+ float xmin1 = max(box1.location.x - box1.location.width * 0.5f, 0.0f);
+ float ymin1 = max(box1.location.y - box1.location.height * 0.5f, 0.0f);
+ float xmax1 = min(box1.location.x + box1.location.width * 0.5f, input_width);
+ float ymax1 = min(box1.location.y + box1.location.height * 0.5f, input_height);
+
+ float intersectXmin = max(xmin0, xmin1);
+ float intersectYmin = max(ymin0, ymin1);
+ float intersectXmax = min(xmax0, xmax1);
+ float intersectYmax = min(ymax0, ymax1);
+
+ float intersectArea = max((intersectYmax - intersectYmin), 0.0f) * max((intersectXmax - intersectXmin), 0.0f);
+ return intersectArea / (area0 + area1 - intersectArea);
+}
+
+ObjectDetectionResult &YoloV5::result()
+{
+ // Clear _result object because result() function can be called every time user wants
+ // so make sure to clear existing result data before getting the data again.
+ memset(reinterpret_cast<void *>(&_result), 0, sizeof(_result));
+
+ vector<string> names;
+ ObjectDetection::getOutputNames(names);
+
+ for (auto &name : names)
+ LOGI("output name: %s", name.c_str());
+
+ vector<float> output_tensor;
+ ObjectDetection::getOutputTensor(names[0], output_tensor);
+
+ auto scoreMetaInfo = _parser->getOutputMetaMap().at(names[0]);
+ auto decodingScore = static_pointer_cast<DecodingScore>(scoreMetaInfo->decodingTypeMap[DecodingType::SCORE]);
+ auto decodingBox = static_pointer_cast<DecodingBox>(scoreMetaInfo->decodingTypeMap[DecodingType::BOX]);
+ auto boxNmsParam = static_pointer_cast<BoxNmsParam>(decodingBox->decodingInfoMap[BoxDecodingType::NMS]);
+ vector<unsigned int> &order = decodingBox->order; // order of (bx, by, bw, bh)
+
+ int input_width = _inference->getInputWidth();
+ int input_height = _inference->getInputHeight();
+
+ int source_width = _preprocess.getImageWidth()[0];
+ int source_height = _preprocess.getImageHeight()[0];
+
+ float w_ratio = (source_width / (float) input_width);
+ float h_ratio = (source_height / (float) input_height);
+
+ /*
+ Output dimensions 1xBx85, B is the number of predicted boxes
+ Each box has 85 numbers : (bx, by, bw, bh, confidence, ...class scores for 80 classes.......)
+ (bx, by) = absolute centre coordinates of the box
+ (bw, bh) = absolute width and height of the box
+ */
+ int box_offset = 5 + _labels.size();
+
+ vector<Box> boxes;
+ float bx, by, bw, bh, confidence, score, logit;
+ int label;
+
+ for (int idx = 0; idx < (int) output_tensor.size(); idx += box_offset) {
+ confidence = output_tensor[idx + 4];
+ if (confidence <= decodingScore->threshold)
+ continue;
+
+ bx = output_tensor[idx + order[0]];
+ by = output_tensor[idx + order[1]];
+ bw = output_tensor[idx + order[2]];
+ bh = output_tensor[idx + order[3]];
+
+ score = 0;
+ label = 0;
+ for (int k = 5; k < box_offset; k++) {
+ logit = output_tensor[idx + k];
+ if (logit > score) {
+ label = k - 5;
+ score = logit;
+ }
+ }
+
+ Box box = { .index = label, .score = confidence, .location = cv::Rect2f(bx, by, bw, bh) }; // x, y, w, h
+ boxes.push_back(box);
+ }
+
+ //sort boxes by confidence in descending order
+ sort(boxes.begin(), boxes.end(), compareScore);
+
+ //Non Maximal Suppression (NMS), class agnostic : boxes with same label are compared against each other
+ bool isIgnore;
+ vector<Box> candidate_box_vec;
+
+ for (auto &decoded_box : boxes) {
+ isIgnore = false;
+
+ for (auto candidate_box = candidate_box_vec.rbegin(); candidate_box != candidate_box_vec.rend();
+ ++candidate_box) {
+ float iouValue = calcIntersectionOverUnion(decoded_box, (*candidate_box), input_width, input_height);
+
+ if (iouValue >= boxNmsParam->iouThreshold) {
+ isIgnore = true;
+ break;
+ }
+ }
+
+ if (!isIgnore)
+ candidate_box_vec.push_back(decoded_box);
+ }
+
+ float left, top, right, bottom;
+ int idx;
+
+ for (auto &box : candidate_box_vec) {
+ idx = _result.number_of_objects++;
+ _result.indices.push_back(idx);
+ _result.confidences.push_back(box.score);
+ _result.names.push_back(_labels[box.index]);
+
+ left = max(box.location.x - box.location.width * 0.5f, 0.0f);
+ top = max(box.location.y - box.location.height * 0.5f, 0.0f);
+ right = min(box.location.x + box.location.width * 0.5f, (float) input_width);
+ bottom = min(box.location.y + box.location.height * 0.5f, (float) input_height);
+
+ _result.left.push_back(static_cast<int>(left * w_ratio));
+ _result.top.push_back(static_cast<int>(top * h_ratio));
+ _result.right.push_back(static_cast<int>(right * w_ratio));
+ _result.bottom.push_back(static_cast<int>(bottom * h_ratio));
+
+ LOGI("idx = %d, name = %s, score = %f, %dx%d, %dx%d", idx, _result.names[idx].c_str(), _result.confidences[idx],
+ _result.left[idx], _result.top[idx], _result.right[idx], _result.bottom[idx]);
+
+ if (decodingScore->topNumber == _result.number_of_objects)
+ break;
+ }
+
+ LOGI("LEAVE");
+
+ return _result;
+}
+
+}
+}