mv_machine_learning: add Hailo NPU Runtime backend support 63/319063/2
authorInki Dae <inki.dae@samsung.com>
Tue, 4 Feb 2025 01:57:57 +0000 (10:57 +0900)
committerInki Dae <inki.dae@samsung.com>
Tue, 4 Feb 2025 08:12:28 +0000 (17:12 +0900)
Add Hailo-8 series NPU engine backend support.

As first support to Hailo NPU model, this patch adds Yolov10s model support
for Hailo-8L NPU device.

Change-Id: I7ce5bca8e60f0fb06d0b1dd9466dbd994a907fbb
Signed-off-by: Inki Dae <inki.dae@samsung.com>
include/mv_inference_type.h
mv_machine_learning/common/include/common.h
mv_machine_learning/inference/include/Inference.h
mv_machine_learning/inference/src/Inference.cpp
mv_machine_learning/inference/src/TensorBuffer.cpp
mv_machine_learning/object_detection/include/HailoYoloXs.h [new file with mode: 0644]
mv_machine_learning/object_detection/include/ObjectDetectionAdapter.h
mv_machine_learning/object_detection/include/object_detection_type.h
mv_machine_learning/object_detection/src/HailoYoloXs.cpp [new file with mode: 0644]
mv_machine_learning/object_detection/src/ObjectDetection.cpp
mv_machine_learning/object_detection/src/ObjectDetectionAdapter.cpp

index b337ee96ed0c4f1d6061bdbb20480306c636444b..a31559a6353abfcda26a7fb8b10251d768d96f1f 100644 (file)
@@ -69,6 +69,7 @@ typedef enum {
        MV_INFERENCE_BACKEND_ONE, /**< On-device Neural Engine (Since 6.0) */
        MV_INFERENCE_BACKEND_NNTRAINER, /**< NNTrainer (Since 7.0) */
        MV_INFERENCE_BACKEND_SNPE, /**< SNPE Engine (Since 7.0) */
+       MV_INFERENCE_BACKEND_HAILORT, /**< Hailo Runtime Engine (Since 10.0) */
        MV_INFERENCE_BACKEND_MAX /**< @deprecated Backend MAX (Deprecated since 7.0) */
 } mv_inference_backend_type_e;
 
index 350ea31ea2fabe2ee9d3f5940d710aff729b85e5..e19b50ed7353f0e5958b602cff20316a0cf34de2 100644 (file)
@@ -31,7 +31,8 @@ namespace common
 static std::map<std::string, int> gBackendTypeTable = {
        { "OPENCV", MV_INFERENCE_BACKEND_OPENCV },               { "TFLITE", MV_INFERENCE_BACKEND_TFLITE },
        { "ARMNN", MV_INFERENCE_BACKEND_ARMNN },                 { "ONE", MV_INFERENCE_BACKEND_ONE },
-       { "NNTRAINER", MV_INFERENCE_BACKEND_NNTRAINER }, { "SNPE", MV_INFERENCE_BACKEND_SNPE }
+       { "NNTRAINER", MV_INFERENCE_BACKEND_NNTRAINER }, { "SNPE", MV_INFERENCE_BACKEND_SNPE },
+       { "HAILORT", MV_INFERENCE_BACKEND_HAILORT }
 };
 
 // TODO: mv_inference_target_device_e will be deprecated in tizen 11.0
index 269f8b037457a66f202b1c89774aaa5685b54ae2..50b31a4d7e478b137867ea7763b4a5e0fa3221b9 100644 (file)
@@ -358,7 +358,7 @@ private:
                { MV_INFERENCE_BACKEND_OPENCV, { "opencv", false } }, { MV_INFERENCE_BACKEND_TFLITE, { "tflite", false } },
                { MV_INFERENCE_BACKEND_ARMNN, { "armnn", false } },       { MV_INFERENCE_BACKEND_MLAPI, { "mlapi", false } },
                { MV_INFERENCE_BACKEND_ONE, { "mlapi", false } },         { MV_INFERENCE_BACKEND_NNTRAINER, { "mlapi", false } },
-               { MV_INFERENCE_BACKEND_SNPE, { "mlapi", false } },
+               { MV_INFERENCE_BACKEND_SNPE, { "mlapi", false } },        { MV_INFERENCE_BACKEND_HAILORT, { "hailort", false } }
        };
        cv::Size mInputSize;
        cv::Size mSourceSize;
@@ -371,6 +371,7 @@ private:
                { "weights", INFERENCE_MODEL_DARKNET },  { "bin", INFERENCE_MODEL_DLDT },
                { "onnx", INFERENCE_MODEL_ONNX },                { "nb", INFERENCE_MODEL_VIVANTE },
                { "ini", INFERENCE_MODEL_NNTRAINER },    { "dlc", INFERENCE_MODEL_SNPE },
+               { "hef", INFERENCE_MODEL_HAILORT }
        };
        std::vector<std::string> mUserListName;
        TensorBuffer mInputTensorBuffers;
index 8f131afdc430d42a8740e515eca86653fe073e85..7829e67fa8e605d74ac10bb035b62a504f090249 100644 (file)
@@ -756,7 +756,7 @@ int Inference::bind(int backend_type, int device_type)
                .backend_type = backend_type,
                // As a default, Target device is CPU. If user defined desired device type in json file
                // then the device type will be set by Load callback.
-               .target_devices = device_type,
+               .target_devices = device_type
        };
 
        // Create a backend class object.
@@ -792,7 +792,7 @@ int Inference::bind(int backend_type, int device_type)
                return ret;
        }
 
-       if (!isTargetDeviceSupported(mConfig.mTargetTypes)) {
+       if (!isTargetDeviceSupported(config.target_devices)) {
                mBackend->UnbindBackend();
                LOGE("Tried to configure invalid target types.");
                return MEDIA_VISION_ERROR_NOT_SUPPORTED;
@@ -849,6 +849,7 @@ int Inference::load(void)
        case INFERENCE_MODEL_TORCH:
        case INFERENCE_MODEL_NNTRAINER:
        case INFERENCE_MODEL_SNPE:
+       case INFERENCE_MODEL_HAILORT:
                models.push_back(mConfig.mWeightFilePath);
                break;
        default:
index afb0b4059c2b28b9f5014fae00c49d50454baef3..accf0939de8efd3871890cf73a876f86ed7ded0b 100644 (file)
@@ -65,7 +65,8 @@ int TensorBuffer::allocate(inference_engine_tensor_buffer &tensor_buffer,
                return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
        }
 
-       tensor_buffer.size = tensor_info.size * getSizeOf(tensor_info.data_type);
+       tensor_buffer.size = tensor_info.is_size_fixed ? tensor_info.size :
+                                                                                                        tensor_info.size * getSizeOf(tensor_info.data_type);
        tensor_buffer.owner_is_backend = 0;
        tensor_buffer.data_type = tensor_info.data_type;
 
diff --git a/mv_machine_learning/object_detection/include/HailoYoloXs.h b/mv_machine_learning/object_detection/include/HailoYoloXs.h
new file mode 100644 (file)
index 0000000..b285d45
--- /dev/null
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HAILO_YOLO_X_S_H__
+#define __HAILO_YOLO_X_S_H__
+
+#include "mv_private.h"
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "ObjectDetection.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T> class HailoYoloXs : public ObjectDetection<T>
+{
+       using ObjectDetection<T>::_preprocess;
+       using ObjectDetection<T>::_labels;
+       using ObjectDetection<T>::_inference;
+
+private:
+       ObjectDetectionResult _result;
+
+public:
+       HailoYoloXs(ObjectDetectionTaskType task_type, std::shared_ptr<Config> config);
+       ~HailoYoloXs();
+
+       ObjectDetectionResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
index 6a43f996415e151e51d05804944c885defe2637b..ad162ff549ccc89af34da3a9771a7365c168a503 100644 (file)
@@ -20,6 +20,7 @@
 #include <dlog.h>
 
 #include "EngineConfig.h"
+#include "HailoYoloXs.h"
 #include "ITask.h"
 #include "MobilenetV1Ssd.h"
 #include "MobilenetV2Ssd.h"
index 358dfef87104a59cc5b754309719a8d232987d5d..27de7eeefd70a77a521dd64f01e70a64f7a1c0f4 100644 (file)
@@ -54,7 +54,8 @@ enum class ObjectDetectionTaskType {
        FD_MOBILENET_V1_SSD,
        OD_TRIV2,
        FD_TRIV2,
-       HD_PALM
+       HD_PALM,
+       HAILO8_YOLOXS
        // TODO
 };
 
diff --git a/mv_machine_learning/object_detection/src/HailoYoloXs.cpp b/mv_machine_learning/object_detection/src/HailoYoloXs.cpp
new file mode 100644 (file)
index 0000000..09f2e85
--- /dev/null
@@ -0,0 +1,107 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string.h>
+
+#include "HailoYoloXs.h"
+#include "MvMlException.h"
+#include "Postprocess.h"
+#include "mv_object_detection_config.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+HailoYoloXs<T>::HailoYoloXs(ObjectDetectionTaskType task_type, std::shared_ptr<Config> config)
+               : ObjectDetection<T>(task_type, config), _result()
+{}
+
+template<typename T> HailoYoloXs<T>::~HailoYoloXs()
+{}
+
+template<typename T> ObjectDetectionResult &HailoYoloXs<T>::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       _result = ObjectDetectionResult();
+
+       vector<string> names;
+
+       ObjectDetection<T>::getOutputNames(names);
+
+       for (auto &name : names)
+               LOGD("output tensor name : %s", name.c_str());
+
+       vector<float> output_tensor;
+
+       // output layer name is yolov10s/yolov8_nms_postprocess
+       ObjectDetection<T>::getOutputTensor(names[0], output_tensor);
+
+       auto ori_src_width = static_cast<float>(_preprocess.getImageWidth()[0]);
+       auto ori_src_height = static_cast<float>(_preprocess.getImageHeight()[0]);
+       auto input_tensor_width = static_cast<float>(_inference->getInputWidth());
+       auto input_tensor_height = static_cast<float>(_inference->getInputHeight());
+
+       // Calculate the ratio[A] between the original image size and the input tensor size.
+       auto width_ratio = ori_src_width / input_tensor_width;
+       auto height_ratio = ori_src_height / input_tensor_height;
+
+       for (size_t tensor_idx = 0; tensor_idx < output_tensor.size(); ++tensor_idx) {
+               float num_of_classes;
+               unsigned int class_id;
+
+               num_of_classes = output_tensor[tensor_idx];
+               class_id = tensor_idx;
+               if (num_of_classes <= 0.0f)
+                       continue;
+
+               for (unsigned int class_idx = 0; class_idx < num_of_classes; ++class_idx) {
+                       float left, top, right, bottom, confidence;
+
+                       // x = [width A] * width of input tensor * width ratio value of output tensor.
+                       // y = [height A] * height of input tensor * height ratio value of output tensor.
+                       top = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
+                       left = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
+                       bottom = height_ratio * input_tensor_height * output_tensor[++tensor_idx];
+                       right = width_ratio * input_tensor_width * output_tensor[++tensor_idx];
+                       confidence = output_tensor[++tensor_idx];
+
+                       _result.top.push_back(static_cast<int>(top));
+                       _result.left.push_back(static_cast<int>(left));
+                       _result.bottom.push_back(static_cast<int>(bottom));
+                       _result.right.push_back(static_cast<int>(right));
+                       _result.confidences.push_back(confidence);
+               }
+
+               _result.number_of_objects += num_of_classes;
+               _result.indices.push_back(class_id);
+       }
+
+       return _result;
+}
+
+template class HailoYoloXs<float>;
+template class HailoYoloXs<unsigned char>;
+
+}
+}
index 50c7a416d0926c45e3eba3cb6a22b3b1d97ffeac..ab1187e37378439d46da94350172f9e7faef6b81 100644 (file)
@@ -236,19 +236,23 @@ template<typename T> void ObjectDetection<T>::configurePreprocess()
                                                                metaInfo->getWidth(),
                                                                metaInfo->getHeight() };
 
-       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
-       if (normalization) {
-               config.normalize = normalization->use;
-               config.mean = normalization->mean;
-               config.std = normalization->std;
+       if (metaInfo->decodingTypeMap.find(DecodingType::NORMAL) != metaInfo->decodingTypeMap.end()) {
+               auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+               if (normalization) {
+                       config.normalize = normalization->use;
+                       config.mean = normalization->mean;
+                       config.std = normalization->std;
+               }
        }
 
-       auto quantization =
-                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
-       if (quantization) {
-               config.quantize = quantization->use;
-               config.scale = quantization->scale;
-               config.zeropoint = quantization->zeropoint;
+       if (metaInfo->decodingTypeMap.find(DecodingType::QUANTIZATION) != metaInfo->decodingTypeMap.end()) {
+               auto quantization =
+                               static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+               if (quantization) {
+                       config.quantize = quantization->use;
+                       config.scale = quantization->scale;
+                       config.zeropoint = quantization->zeropoint;
+               }
        }
 
        _preprocess.setConfig(config);
index 6e685b296c22c458fa810a048df4483ecd0c6dd8..526254708ab5d9f181457bf510f2e0c555b05f84 100644 (file)
@@ -55,6 +55,9 @@ template<typename U> void ObjectDetectionAdapter::create(ObjectDetectionTaskType
        case ObjectDetectionTaskType::MOBILENET_V2_SSD:
                _object_detection = make_unique<MobilenetV2Ssd<U> >(task_type, _config);
                break;
+       case ObjectDetectionTaskType::HAILO8_YOLOXS:
+               _object_detection = make_unique<HailoYoloXs<U> >(task_type, _config);
+               break;
        default:
                throw InvalidOperation("Invalid object detection task type.");
        }
@@ -102,6 +105,8 @@ ObjectDetectionTaskType ObjectDetectionAdapter::convertToTaskType(string model_n
                return ObjectDetectionTaskType::MOBILENET_V1_SSD;
        else if (model_name == "MOBILENET_V2_SSD")
                return ObjectDetectionTaskType::MOBILENET_V2_SSD;
+       else if (model_name == "HAILO8_YOLOXS")
+               return ObjectDetectionTaskType::HAILO8_YOLOXS;
        // TODO.
 
        throw InvalidParameter("Invalid object detection model name.");