mv_machine_learning: add Stdc1 model support for semantic segmentation

author Inki Dae <inki.dae@samsung.com>

Tue, 25 Feb 2025 09:16:35 +0000 (18:16 +0900)

committer Inki Dae <inki.dae@samsung.com>

Thu, 27 Feb 2025 06:26:03 +0000 (15:26 +0900)
author Inki Dae <inki.dae@samsung.com>
Tue, 25 Feb 2025 09:16:35 +0000 (18:16 +0900)
committer Inki Dae <inki.dae@samsung.com>
Thu, 27 Feb 2025 06:26:03 +0000 (15:26 +0900)
diff --git a/mv_machine_learning/image_segmentation/include/Stdc1.h b/mv_machine_learning/image_segmentation/include/Stdc1.h

new file mode 100644 (file)

index 0000000..681caf9
--- /dev/null
+++ b/mv_machine_learning/image_segmentation/include/Stdc1.h
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STDC1_H__
+#define __STDC1_H__
+
+#include "mv_private.h"
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "ImageSegmentation.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T> class Stdc1 : public ImageSegmentation<T>
+{
+       using ImageSegmentation<T>::_config;
+       using ImageSegmentation<T>::_preprocess;
+       using ImageSegmentation<T>::_labels;
+
+private:
+       ImageSegmentationResult _result;
+
+public:
+       Stdc1(std::shared_ptr<Config> config);
+       ~Stdc1();
+
+       ImageSegmentationResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
+\ No newline at end of file
diff --git a/mv_machine_learning/image_segmentation/include/image_segmentation_type.h b/mv_machine_learning/image_segmentation/include/image_segmentation_type.h

index 82b16198fca0792de0d0a1d724ca1fe8f3d1c4a4..904a56ee61bc491daafd6a2eb513bae790bb538c 100644 (file)
--- a/mv_machine_learning/image_segmentation/include/image_segmentation_type.h
+++ b/mv_machine_learning/image_segmentation/include/image_segmentation_type.h
@@ -47,7 +47,8 @@ struct ImageSegmentationResult : public OutputBaseType {
  enum class ImageSegmentationTaskType {
         IMAGE_SEGMENTATION_TASK_NONE = 0,
         SELFIE_SEGMENTATION,
-       DEEPLAB_V3_MOBILENET_V2
+       DEEPLAB_V3_MOBILENET_V2,
+       STDC1
         // TODO
  };
  
diff --git a/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json b/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json

index d2020a2762a6082fdebcd9e429b2ded9cdde7815..42b246e6e3fe97c18d89b22cffb3c4449ff96f9c 100644 (file)
--- a/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json
+++ b/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json
@@ -9,22 +9,22 @@
                 {
              "name"  : "MODEL_FILE_NAME",
              "type"  : "string",
-            "value" : "deeplab_v3_mobilenet_v2.hef"
+            "value" : "stdc1.hef"
          },
          {
              "name"  : "DEFAULT_MODEL_NAME",
              "type"  : "string",
-            "value" : "DEEPLAB_V3_MOBILENET_V2"
+            "value" : "STDC1"
          },
          {
              "name"  : "MODEL_META_FILE_NAME",
              "type"  : "string",
-            "value" : "deeplab_v3_mobilenet_v2.json"
+            "value" : "stdc1.json"
          },
          {
              "name"  : "MODEL_LABEL_FILE_NAME",
              "type"  : "string",
-            "value" : "deeplab_v3_mobilenet_v2.txt"
+            "value" : "stdc1.txt"
          },
          {
              "name"  : "BACKEND_TYPE",
diff --git a/mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp b/mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp

index d6abfe6d5e1b8a2a35e8b6f0ed14904bafc8a891..142a6e1dbe93a63e588506b67f7085d6b02692f5 100644 (file)
--- a/mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp
+++ b/mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp
@@ -16,6 +16,7 @@
  
  #include "SemanticSegmentationAdapter.h"
  #include "DeeplabV3.h"
+#include "Stdc1.h"
  #include "MvMlException.h"
  #include "mv_image_segmentation_config.h"
  
@@ -52,6 +53,9 @@ template<typename U> void SemanticSegmentationAdapter::create(ImageSegmentationT
         case ImageSegmentationTaskType::DEEPLAB_V3_MOBILENET_V2:
                 _semantic_segmentation = make_unique<DeeplabV3<U> >(_config);
                 break;
+       case ImageSegmentationTaskType::STDC1:
+               _semantic_segmentation = make_unique<Stdc1<U> >(_config);
+               break;
         default:
                 throw InvalidOperation("Invalid semantic segmentation task type.");
         }
@@ -88,6 +92,8 @@ ImageSegmentationTaskType SemanticSegmentationAdapter::convertToTaskType(string
  
         if (model_name == "DEEPLAB_V3_MOBILENET_V2")
                 return ImageSegmentationTaskType::DEEPLAB_V3_MOBILENET_V2;
+       else if (model_name == "STDC1")
+               return ImageSegmentationTaskType::STDC1;
  
         throw InvalidParameter("Invalid semantic segmentation model name.");
  }
diff --git a/mv_machine_learning/image_segmentation/src/Stdc1.cpp b/mv_machine_learning/image_segmentation/src/Stdc1.cpp

new file mode 100644 (file)

index 0000000..c322130
--- /dev/null
+++ b/mv_machine_learning/image_segmentation/src/Stdc1.cpp
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <string.h>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "Stdc1.h"
+#include "MvMlException.h"
+#include "Postprocess.h"
+#include "mv_image_segmentation_config.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+Stdc1<T>::Stdc1(std::shared_ptr<Config> config)
+               : ImageSegmentation<T>(config), _result()
+{}
+
+template<typename T> Stdc1<T>::~Stdc1()
+{}
+
+template<typename T> ImageSegmentationResult &Stdc1<T>::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       _result = ImageSegmentationResult();
+
+       vector<string> names;
+       ImageSegmentation<T>::getOutputNames(names);
+       vector<T> outputTensor;
+       ImageSegmentation<T>::getOutputTensor(names[0], outputTensor);
+
+       auto &outputMetaMap = _config->getOutputMetaMap();
+       auto &metaInfo = outputMetaMap["stdc1/argmax1"];
+       auto height = metaInfo->dims[1];
+       auto width = metaInfo->dims[2];
+       auto ori_src_width = static_cast<double>(_preprocess.getImageWidth()[0]);
+       auto ori_src_height = static_cast<double>(_preprocess.getImageHeight()[0]);
+       auto input_tensor_width = static_cast<double>(width);
+       auto input_tensor_height = static_cast<double>(height);
+
+       // Calculate the ratio[A] between the original image size and the input tensor size.
+       auto width_ratio = ori_src_width / input_tensor_width;
+       auto height_ratio = ori_src_height / input_tensor_height;
+
+       _result.height = static_cast<int>(input_tensor_height * height_ratio);
+       _result.width = static_cast<int>(input_tensor_width * width_ratio);
+       _result.pixel_size = 1;
+       _result.labels = _labels;
+
+       cv::Mat cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 1), outputTensor.data());
+       cv::Mat cvDest(_result.height, _result.width, CV_8UC1);
+
+       cv::resize(cvSource, cvDest, cv::Size(_result.width, _result.height), 0, 0, cv::INTER_NEAREST);
+
+       for (unsigned int h = 0; h < _result.height; ++h)
+               for (unsigned int w = 0; w < _result.width; ++w)
+                       _result.data.push_back(*cvDest.ptr<unsigned char>(h, w));
+
+       return _result;
+}
+
+template class Stdc1<unsigned char>;
+}
+}
diff --git a/mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp b/mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp

index cdda347f06d2fb8806b9354c27f23b3d56dd84a6..1269e6850b1757f686bde21c485a08ef6e48e2f1 100644 (file)
--- a/mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp
+++ b/mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp
@@ -303,6 +303,24 @@ int mv_semantic_segmentation_inference_async(mv_semantic_segmentation_h handle,
  int mv_semantic_segmentation_get_result_count(mv_semantic_segmentation_h handle, unsigned long *frame_number,
                                                                                  unsigned int *result_cnt)
  {
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(frame_number);
+       MEDIA_VISION_INSTANCE_CHECK(result_cnt);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result = static_cast<ImageSegmentationResult &>(machine_learning_native_get_result(handle, TASK_NAME));
+
+               *frame_number = result.frame_number;
+               *result_cnt = result.data.size() ? 1 : 0;
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
         return MEDIA_VISION_ERROR_NONE;
  }
author	Inki Dae <inki.dae@samsung.com>
	Tue, 25 Feb 2025 09:16:35 +0000 (18:16 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Thu, 27 Feb 2025 06:26:03 +0000 (15:26 +0900)
mv_machine_learning/image_segmentation/include/Stdc1.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/image_segmentation/include/image_segmentation_type.h		patch \| blob \| history
mv_machine_learning/image_segmentation/meta/semantic_segmentation.json		patch \| blob \| history
mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp		patch \| blob \| history
mv_machine_learning/image_segmentation/src/Stdc1.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp		patch \| blob \| history