mv_machine_learning: add Stdc1 model support for semantic segmentation 48/320248/2
authorInki Dae <inki.dae@samsung.com>
Tue, 25 Feb 2025 09:16:35 +0000 (18:16 +0900)
committerInki Dae <inki.dae@samsung.com>
Thu, 27 Feb 2025 06:26:03 +0000 (15:26 +0900)
Add Stdc1 model support for semantic segmentation task group.

This model is one of best models for semantic segmentation task so
use this model in default. And in addition, this patch introduces
a new native API, mv_semantic_segmentation_get_result_count()
like other task API did.

Change-Id: I904ef124d334539e19b8a1db11a16ce4986c53ba
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_machine_learning/image_segmentation/include/Stdc1.h [new file with mode: 0644]
mv_machine_learning/image_segmentation/include/image_segmentation_type.h
mv_machine_learning/image_segmentation/meta/semantic_segmentation.json
mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp
mv_machine_learning/image_segmentation/src/Stdc1.cpp [new file with mode: 0644]
mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp

diff --git a/mv_machine_learning/image_segmentation/include/Stdc1.h b/mv_machine_learning/image_segmentation/include/Stdc1.h
new file mode 100644 (file)
index 0000000..681caf9
--- /dev/null
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STDC1_H__
+#define __STDC1_H__
+
+#include "mv_private.h"
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "ImageSegmentation.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T> class Stdc1 : public ImageSegmentation<T>
+{
+       using ImageSegmentation<T>::_config;
+       using ImageSegmentation<T>::_preprocess;
+       using ImageSegmentation<T>::_labels;
+
+private:
+       ImageSegmentationResult _result;
+
+public:
+       Stdc1(std::shared_ptr<Config> config);
+       ~Stdc1();
+
+       ImageSegmentationResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
index 82b16198fca0792de0d0a1d724ca1fe8f3d1c4a4..904a56ee61bc491daafd6a2eb513bae790bb538c 100644 (file)
@@ -47,7 +47,8 @@ struct ImageSegmentationResult : public OutputBaseType {
 enum class ImageSegmentationTaskType {
        IMAGE_SEGMENTATION_TASK_NONE = 0,
        SELFIE_SEGMENTATION,
-       DEEPLAB_V3_MOBILENET_V2
+       DEEPLAB_V3_MOBILENET_V2,
+       STDC1
        // TODO
 };
 
index d2020a2762a6082fdebcd9e429b2ded9cdde7815..42b246e6e3fe97c18d89b22cffb3c4449ff96f9c 100644 (file)
@@ -9,22 +9,22 @@
                {
             "name"  : "MODEL_FILE_NAME",
             "type"  : "string",
-            "value" : "deeplab_v3_mobilenet_v2.hef"
+            "value" : "stdc1.hef"
         },
         {
             "name"  : "DEFAULT_MODEL_NAME",
             "type"  : "string",
-            "value" : "DEEPLAB_V3_MOBILENET_V2"
+            "value" : "STDC1"
         },
         {
             "name"  : "MODEL_META_FILE_NAME",
             "type"  : "string",
-            "value" : "deeplab_v3_mobilenet_v2.json"
+            "value" : "stdc1.json"
         },
         {
             "name"  : "MODEL_LABEL_FILE_NAME",
             "type"  : "string",
-            "value" : "deeplab_v3_mobilenet_v2.txt"
+            "value" : "stdc1.txt"
         },
         {
             "name"  : "BACKEND_TYPE",
index d6abfe6d5e1b8a2a35e8b6f0ed14904bafc8a891..142a6e1dbe93a63e588506b67f7085d6b02692f5 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "SemanticSegmentationAdapter.h"
 #include "DeeplabV3.h"
+#include "Stdc1.h"
 #include "MvMlException.h"
 #include "mv_image_segmentation_config.h"
 
@@ -52,6 +53,9 @@ template<typename U> void SemanticSegmentationAdapter::create(ImageSegmentationT
        case ImageSegmentationTaskType::DEEPLAB_V3_MOBILENET_V2:
                _semantic_segmentation = make_unique<DeeplabV3<U> >(_config);
                break;
+       case ImageSegmentationTaskType::STDC1:
+               _semantic_segmentation = make_unique<Stdc1<U> >(_config);
+               break;
        default:
                throw InvalidOperation("Invalid semantic segmentation task type.");
        }
@@ -88,6 +92,8 @@ ImageSegmentationTaskType SemanticSegmentationAdapter::convertToTaskType(string
 
        if (model_name == "DEEPLAB_V3_MOBILENET_V2")
                return ImageSegmentationTaskType::DEEPLAB_V3_MOBILENET_V2;
+       else if (model_name == "STDC1")
+               return ImageSegmentationTaskType::STDC1;
 
        throw InvalidParameter("Invalid semantic segmentation model name.");
 }
diff --git a/mv_machine_learning/image_segmentation/src/Stdc1.cpp b/mv_machine_learning/image_segmentation/src/Stdc1.cpp
new file mode 100644 (file)
index 0000000..c322130
--- /dev/null
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <string.h>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "Stdc1.h"
+#include "MvMlException.h"
+#include "Postprocess.h"
+#include "mv_image_segmentation_config.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+Stdc1<T>::Stdc1(std::shared_ptr<Config> config)
+               : ImageSegmentation<T>(config), _result()
+{}
+
+template<typename T> Stdc1<T>::~Stdc1()
+{}
+
+template<typename T> ImageSegmentationResult &Stdc1<T>::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       _result = ImageSegmentationResult();
+
+       vector<string> names;
+       ImageSegmentation<T>::getOutputNames(names);
+       vector<T> outputTensor;
+       ImageSegmentation<T>::getOutputTensor(names[0], outputTensor);
+
+       auto &outputMetaMap = _config->getOutputMetaMap();
+       auto &metaInfo = outputMetaMap["stdc1/argmax1"];
+       auto height = metaInfo->dims[1];
+       auto width = metaInfo->dims[2];
+       auto ori_src_width = static_cast<double>(_preprocess.getImageWidth()[0]);
+       auto ori_src_height = static_cast<double>(_preprocess.getImageHeight()[0]);
+       auto input_tensor_width = static_cast<double>(width);
+       auto input_tensor_height = static_cast<double>(height);
+
+       // Calculate the ratio[A] between the original image size and the input tensor size.
+       auto width_ratio = ori_src_width / input_tensor_width;
+       auto height_ratio = ori_src_height / input_tensor_height;
+
+       _result.height = static_cast<int>(input_tensor_height * height_ratio);
+       _result.width = static_cast<int>(input_tensor_width * width_ratio);
+       _result.pixel_size = 1;
+       _result.labels = _labels;
+
+       cv::Mat cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 1), outputTensor.data());
+       cv::Mat cvDest(_result.height, _result.width, CV_8UC1);
+
+       cv::resize(cvSource, cvDest, cv::Size(_result.width, _result.height), 0, 0, cv::INTER_NEAREST);
+
+       for (unsigned int h = 0; h < _result.height; ++h)
+               for (unsigned int w = 0; w < _result.width; ++w)
+                       _result.data.push_back(*cvDest.ptr<unsigned char>(h, w));
+
+       return _result;
+}
+
+template class Stdc1<unsigned char>;
+}
+}
index cdda347f06d2fb8806b9354c27f23b3d56dd84a6..1269e6850b1757f686bde21c485a08ef6e48e2f1 100644 (file)
@@ -303,6 +303,24 @@ int mv_semantic_segmentation_inference_async(mv_semantic_segmentation_h handle,
 int mv_semantic_segmentation_get_result_count(mv_semantic_segmentation_h handle, unsigned long *frame_number,
                                                                                 unsigned int *result_cnt)
 {
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(frame_number);
+       MEDIA_VISION_INSTANCE_CHECK(result_cnt);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result = static_cast<ImageSegmentationResult &>(machine_learning_native_get_result(handle, TASK_NAME));
+
+               *frame_number = result.frame_number;
+               *result_cnt = result.data.size() ? 1 : 0;
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
        return MEDIA_VISION_ERROR_NONE;
 }