backends/mediavision: add semantic segmentation task API support

author Inki Dae <inki.dae@samsung.com>

Tue, 25 Feb 2025 09:28:12 +0000 (18:28 +0900)

committer Inki Dae <inki.dae@samsung.com>

Wed, 26 Feb 2025 00:49:50 +0000 (09:49 +0900)
author Inki Dae <inki.dae@samsung.com>
Tue, 25 Feb 2025 09:28:12 +0000 (18:28 +0900)
committer Inki Dae <inki.dae@samsung.com>
Wed, 26 Feb 2025 00:49:50 +0000 (09:49 +0900)
diff --git a/common/include/SingleoCommonTypes.h b/common/include/SingleoCommonTypes.h

index e077884be3ed5f59a23e39b96054a4457a56dc31..e6240f6f48c313bbbee739da677a4bb0cbdb45d3 100644 (file)
--- a/common/include/SingleoCommonTypes.h
+++ b/common/include/SingleoCommonTypes.h
@@ -172,7 +172,8 @@ enum class ResultType {
         HAND_LANDMARK,
         IMAGE_CLASSIFICATION,
         FACE_RECOGNITION,
-       GAZE_TRACKING
+       GAZE_TRACKING,
+       SEMANTIC_SEGMENTATION
  };
  
  struct ServiceBaseResultType {
@@ -289,6 +290,22 @@ struct GazeResultType : public BaseResultType {
         }
  };
  
+struct SsResultType : public BaseResultType {
+       SsResultType() : BaseResultType(ResultType::SEMANTIC_SEGMENTATION)
+       {}
+       unsigned int width {};
+       unsigned int height {};
+       unsigned int pixel_size {};
+       std::vector<char> _labels;
+       const unsigned char *_segment_map {};
+
+       std::shared_ptr<BaseResultType> clone() override
+       {
+               return std::make_shared<SsResultType>(*this);
+       }
+};
+
+
  enum class InputFeedType { NONE, CAMERA, SCREEN_CAPTURE };
  
  enum class CameraBackendType { NONE, OPENCV, CAMERA_API, VISION_SOURCE };
diff --git a/inference/backends/mediavision/CMakeLists.txt b/inference/backends/mediavision/CMakeLists.txt

index c4b00bcf4638ffa28f380c58533ab08cbdc3d3b6..0b8e09cb4a33cf84cea5b9ec564c222f28aac2d8 100644 (file)
--- a/inference/backends/mediavision/CMakeLists.txt
+++ b/inference/backends/mediavision/CMakeLists.txt
@@ -16,8 +16,9 @@ SET(SINGLEO_SERVICE_SOURCE_FILES
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvImageClassification.cpp
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceRecognition.cpp
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvGazeTracking.cpp
-       ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceTaskFactory.cpp
+    ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvSemanticSegmentation.cpp
+    ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceTaskFactory.cpp
  )
  
-LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition mv_gaze_tracking)
+LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition mv_gaze_tracking mv_image_segmentation)
  LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media)
diff --git a/inference/backends/mediavision/include/MvInferenceTaskFactory.h b/inference/backends/mediavision/include/MvInferenceTaskFactory.h

index c705b2076e8bc90b22dae26f4f875c85714c88d5..60fd3e8c5b7f321c903c9c05269c43322b396ba4 100644 (file)
--- a/inference/backends/mediavision/include/MvInferenceTaskFactory.h
+++ b/inference/backends/mediavision/include/MvInferenceTaskFactory.h
@@ -40,6 +40,7 @@ public:
         std::unique_ptr<IInferenceTaskInterface> createHandDetection() override;
         std::unique_ptr<IInferenceTaskInterface> createHandLandmark() override;
         std::unique_ptr<IInferenceTaskInterface> createGazeTracking() override;
+       std::unique_ptr<IInferenceTaskInterface> createSemanticSegmentation() override;
  };
  
  }
diff --git a/inference/backends/mediavision/include/MvSemanticSegmentation.h b/inference/backends/mediavision/include/MvSemanticSegmentation.h

new file mode 100644 (file)

index 0000000..c2ec973
--- /dev/null
+++ b/inference/backends/mediavision/include/MvSemanticSegmentation.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MV_SEMANTIC_SEGMENTATION_H__
+#define __MV_SEMANTIC_SEGMENTATION_H__
+
+#include "IInferenceTaskInterface.h"
+#include "mv_semantic_segmentation_internal.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvSemanticSegmentation : public IInferenceTaskInterface
+{
+private:
+       mv_semantic_segmentation_h _handle {};
+       SsResultType _output_data {};
+
+public:
+       MvSemanticSegmentation();
+       virtual ~MvSemanticSegmentation();
+
+       void configure() override;
+       void prepare() override;
+       void invoke(BaseDataType &input, bool async) override;
+       BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
diff --git a/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp b/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp

index 172ed97b00961f2ebefb5b003b79a4921af74f64..d83c9cb886179a2847a07cbb55e1607f9e982701 100644 (file)
--- a/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp
+++ b/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp
@@ -24,6 +24,7 @@
  #include "MvHandDetection.h"
  #include "MvHandLandmark.h"
  #include "MvGazeTracking.h"
+#include "MvSemanticSegmentation.h"
  #include "SingleoLog.h"
  #include "SingleoException.h"
  
@@ -77,5 +78,10 @@ std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createGazeTrack
  {
         return make_unique<MvGazeTracking>();
  }
+
+std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createSemanticSegmentation()
+{
+       return make_unique<MvSemanticSegmentation>();
+}
  }
  }
diff --git a/inference/backends/mediavision/src/MvSemanticSegmentation.cpp b/inference/backends/mediavision/src/MvSemanticSegmentation.cpp

new file mode 100644 (file)

index 0000000..a293bc9
--- /dev/null
+++ b/inference/backends/mediavision/src/MvSemanticSegmentation.cpp
@@ -0,0 +1,118 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include "MvSemanticSegmentation.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvSemanticSegmentation::MvSemanticSegmentation()
+{
+       int ret = mv_semantic_segmentation_create(&_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create semantic segmentation handle.");
+}
+
+MvSemanticSegmentation::~MvSemanticSegmentation()
+{
+       try {
+               int ret = mv_semantic_segmentation_destroy(_handle);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       SINGLEO_LOGE("Fail to destroy semantic segmentation handle.(%d)", ret);
+       } catch (const std::runtime_error &e) {
+               SINGLEO_LOGE("Failed to destroy semantic segmentation handle: %s", e.what());
+       }
+}
+
+void MvSemanticSegmentation::configure()
+{
+       int ret = mv_semantic_segmentation_configure(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to configure semantic segmentation.");
+}
+
+void MvSemanticSegmentation::prepare()
+{
+       int ret = mv_semantic_segmentation_prepare(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to prepare semantic segmentation.");
+}
+
+void MvSemanticSegmentation::invoke(BaseDataType &input, bool async)
+{
+       ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+
+       if (data._data_type != DataType::IMAGE) {
+               SINGLEO_LOGE("Invalid input type.");
+               throw invalid_argument("Input type not support.");
+       }
+
+       mv_source_h mv_src;
+
+       int ret = mv_create_source(&mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create mv source.");
+
+       try {
+               ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+                                                                          data.height, MEDIA_VISION_COLORSPACE_RGB888);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to convert to mv source.");
+
+               ret = mv_semantic_segmentation_inference(_handle, mv_src);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to invoke semantic segmentation.");
+       } catch (std::runtime_error &e) {
+               SINGLEO_LOGE("%s", e.what());
+       }
+
+       ret = mv_destroy_source(mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to destroy mv source.");
+}
+
+BaseResultType &MvSemanticSegmentation::result()
+{
+       unsigned long frame_number;
+       unsigned int result_cnt;
+
+       int ret = mv_semantic_segmentation_get_result_count(_handle, &frame_number, &result_cnt);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to get semantic segmentation result count.");
+
+       _output_data = SsResultType();
+       _output_data._is_empty = result_cnt == 0;
+       _output_data._frame_number = frame_number;
+
+       for (unsigned int idx = 0; idx < result_cnt; ++idx) {
+               ret = mv_semantic_segmentation_get_result(_handle, &_output_data.width, &_output_data.height, &_output_data.pixel_size, &_output_data._segment_map);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to get semantic segmentation label.");
+       }
+
+       return _output_data;
+}
+
+}
+}
+}
diff --git a/inference/include/IInferenceTaskFactory.h b/inference/include/IInferenceTaskFactory.h

index e6054cb35d45a72a4e54bfcbfa33b56471157451..f40b65a8c0e106243633c44ee4fa6c1fcca3482f 100644 (file)
--- a/inference/include/IInferenceTaskFactory.h
+++ b/inference/include/IInferenceTaskFactory.h
@@ -38,6 +38,7 @@ public:
         virtual std::unique_ptr<IInferenceTaskInterface> createHandDetection() = 0;
         virtual std::unique_ptr<IInferenceTaskInterface> createHandLandmark() = 0;
         virtual std::unique_ptr<IInferenceTaskInterface> createGazeTracking() = 0;
+       virtual std::unique_ptr<IInferenceTaskInterface> createSemanticSegmentation() = 0;
  };
  
  }
diff --git a/services/task_api/include/DataTypes.h b/services/task_api/include/DataTypes.h

index 235898f02e95bfe902863c7b75f211b2006d65b7..3afcfca0a8ac99636790a5785a904137a306679a 100644 (file)
--- a/services/task_api/include/DataTypes.h
+++ b/services/task_api/include/DataTypes.h
@@ -35,11 +35,22 @@ struct ObjectDetectionResult {
         std::vector<Rect> bboxes;
         std::vector<std::string> labels;
  };
+
+struct SemanticSegmentationResult {
+       bool is_valid { false };
+       unsigned int width {};
+       unsigned int height {};
+       unsigned int pixel_size {};
+       std::vector<unsigned char> data;
+       std::vector<std::string> labels;
+};
+
  struct TaskApiResult : public ServiceBaseResultType {
         TaskApiResult() : ServiceBaseResultType(ServiceType::TASK_API)
         {}
         unsigned int frame_number {};
         ObjectDetectionResult object_detection_result;
+       SemanticSegmentationResult semantic_segmentation_result;
  };
  
  }
author	Inki Dae <inki.dae@samsung.com>
	Tue, 25 Feb 2025 09:28:12 +0000 (18:28 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Wed, 26 Feb 2025 00:49:50 +0000 (09:49 +0900)
common/include/SingleoCommonTypes.h		patch \| blob \| history
inference/backends/mediavision/CMakeLists.txt		patch \| blob \| history
inference/backends/mediavision/include/MvInferenceTaskFactory.h		patch \| blob \| history
inference/backends/mediavision/include/MvSemanticSegmentation.h	[new file with mode: 0644]	patch \| blob
inference/backends/mediavision/src/MvInferenceTaskFactory.cpp		patch \| blob \| history
inference/backends/mediavision/src/MvSemanticSegmentation.cpp	[new file with mode: 0644]	patch \| blob
inference/include/IInferenceTaskFactory.h		patch \| blob \| history
services/task_api/include/DataTypes.h		patch \| blob \| history