mv_machine_learning: add semantic segmentation Task API 47/320247/1
authorInki Dae <inki.dae@samsung.com>
Mon, 17 Feb 2025 08:09:43 +0000 (17:09 +0900)
committerInki Dae <inki.dae@samsung.com>
Tue, 25 Feb 2025 00:30:36 +0000 (09:30 +0900)
Add semantic sgementation Task API. As a initial support,
this patch adds DeeplabV3_mobilenetv2 model support.

Change-Id: Ie17eda56bfde678737e872c0bee5903922c51e93
Signed-off-by: Inki Dae <inki.dae@samsung.com>
17 files changed:
CMakeLists.txt
include/mv_semantic_segmentation_internal.h [new file with mode: 0644]
include/mv_semantic_segmentation_type.h [new file with mode: 0644]
mv_machine_learning/image_segmentation/CMakeLists.txt
mv_machine_learning/image_segmentation/include/DeeplabV3.h [new file with mode: 0644]
mv_machine_learning/image_segmentation/include/ImageSegmentation.h
mv_machine_learning/image_segmentation/include/SemanticSegmentationAdapter.h [new file with mode: 0644]
mv_machine_learning/image_segmentation/include/image_segmentation_type.h
mv_machine_learning/image_segmentation/meta/semantic_segmentation.json [new file with mode: 0644]
mv_machine_learning/image_segmentation/meta/semantic_segmentation_plugin.json [new file with mode: 0644]
mv_machine_learning/image_segmentation/src/DeeplabV3.cpp [new file with mode: 0644]
mv_machine_learning/image_segmentation/src/ImageSegmentation.cpp
mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp [new file with mode: 0644]
mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp [new file with mode: 0644]
packaging/capi-media-vision.spec
test/testsuites/machine_learning/image_segmentation/CMakeLists.txt
test/testsuites/machine_learning/image_segmentation/test_semantic_segmentation.cpp [new file with mode: 0644]

index f6bb1e6f582178571821ad24c6e1169ea2268297..e5298e66d10cdede8d70962c2dd547e13638f416 100644 (file)
@@ -280,6 +280,8 @@ if (${ENABLE_ML_IMAGE_SEGMENTATION})
     install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${fw_name}-image-segmentation.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
     install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/image_segmentation/meta/selfie_segmentation.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
        install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/image_segmentation/meta/selfie_segmentation_plugin.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+    install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
+       install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mv_machine_learning/image_segmentation/meta/semantic_segmentation_plugin.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
        list(APPEND TOTAL_REQUIRED ${PC_NAME})
        list(APPEND TOTAL_LDFLAGS ${PC_LDFLAGS})
 endif()
diff --git a/include/mv_semantic_segmentation_internal.h b/include/mv_semantic_segmentation_internal.h
new file mode 100644 (file)
index 0000000..a190f28
--- /dev/null
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_SEMANTIC_SEGMENTATION_INTERNAL_H__
+#define __TIZEN_MEDIAVISION_SEMANTIC_SEGMENTATION_INTERNAL_H__
+
+#include <mv_common.h>
+#include <mv_semantic_segmentation_type.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_semantic_segmentation_internal.h
+ * @internal
+ * @brief  This file contains the Inference based Media Vision API.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_INFERENCE_MODULE
+ * @{
+ */
+
+/**
+ * @internal
+ * @brief Creates a inference handle for semantic segmentation object.
+ * @details Use this function to create a inference handle. After the creation
+ *          the semantic segmentation task has to be prepared with
+ *          mv_semantic_segmentation_prepare() function to prepare a network
+ *          for the inference.
+ *
+ * @since_tizen 10.0
+ *
+ * @remarks The @a handle should be released using mv_semantic_segmentation_destroy().
+ *
+ * @param[out] handle    The handle to the inference to be created.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal Error
+ *
+ * @see mv_semantic_segmentation_destroy()
+ * @see mv_semantic_segmentation_prepare()
+ */
+int mv_semantic_segmentation_create(mv_semantic_segmentation_h *handle);
+
+/**
+ * @internal
+ * @brief Destroys inference handle and releases all its resources.
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle    The handle to the inference to be destroyed.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create inference handle by using mv_semantic_segmentation_create()
+ *
+ * @see mv_semantic_segmentation_create()
+ */
+int mv_semantic_segmentation_destroy(mv_semantic_segmentation_h handle);
+
+/**
+ * @internal
+ * @brief Set user-given model information.
+ * @details Use this function to change the model information instead of default one after calling @ref mv_semantic_segmentation_create().
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle        The handle to the semantic segmentation object.
+ * @param[in] model_file    Model file name.
+ * @param[in] meta_file     Model meta file name.
+ * @param[in] label_file    Label file name.
+ * @param[in] model_name    Model name.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a semantic segmentation handle by calling @ref mv_semantic_segmentation_create()
+ */
+int mv_semantic_segmentation_set_model(mv_semantic_segmentation_h handle, const char *model_file, const char *meta_file,
+                                                                        const char *label_file, const char *model_name);
+
+/**
+ * @internal
+ * @brief Configures the backend for the semantic segmentation inference.
+ *
+ * @since_tizen 10.0
+ *
+ * @param [in] handle         The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ */
+int mv_semantic_segmentation_configure(mv_semantic_segmentation_h handle);
+
+/**
+ * @internal
+ * @brief Prepares the semantic segmentation inference
+ * @details Use this function to prepare the semantic segmentation inference based on
+ *          the configured network.
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle         The handle to the inference.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Not supported format
+ */
+int mv_semantic_segmentation_prepare(mv_semantic_segmentation_h handle);
+
+/**
+ * @internal
+ * @brief Performs the semantic segmentation inference on the @a source.
+ *
+ * @since_tizen 10.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] source         The handle to the source of the media
+ * @param[in] handle          The handle to the inference
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_create()
+ * @pre Prepare an inference by calling mv_object_detect_configure()
+ * @pre Prepare an inference by calling mv_object_detect_prepare()
+ */
+int mv_semantic_segmentation_inference(mv_semantic_segmentation_h handle, mv_source_h source);
+
+/**
+ * @internal
+ * @brief Performs asynchronously the semantic segmentation inference on the @a source.
+ *
+ * @since_tizen 10.0
+ * @remarks This function operates asynchronously, so it returns immediately upon invocation.
+ *          The inference results are inserted into the outgoing queue within the framework
+ *          in the order of processing, and the results can be obtained through mv_semantic_segmentation_get_result()
+ *          and mv_semantic_segmentation_get_label().
+ *
+ * @param[in] handle         The handle to the inference
+ * @param[in] source         The handle to the source of the media
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_create()
+ * @pre Prepare an inference by calling mv_object_detect_configure()
+ * @pre Prepare an inference by calling mv_object_detect_prepare()
+ */
+int mv_semantic_segmentation_inference_async(mv_semantic_segmentation_h handle, mv_source_h source);
+
+/**
+ * @brief Gets the semantic segmentation inference result on the @a handle.
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle          The handle to the inference
+ * @param[out] frame_number   A frame number inferenced.
+ * @param[out] result_cnt     A number of results.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_semantic_segmentation_create()
+ * @pre Prepare an inference by calling mv_semantic_segmentation_configure()
+ * @pre Prepare an inference by calling mv_semantic_segmentation_prepare()
+ * @pre Request an inference by calling mv_semantic_segmentation_inference()
+ */
+int mv_semantic_segmentation_get_result_count(mv_semantic_segmentation_h handle, unsigned long *frame_number,
+                                                                                unsigned int *result_cnt);
+
+/**
+ * @internal
+ * @brief Gets the semantic segmentation inference result on the @a source.
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] infer         The handle to the inference
+ * @param[out] width        Width size of output image.
+ * @param[out] height       Height size of output image.
+ * @param[out] pixel_size   Pixel size of output image in bytes.
+ * @param[out] data         A pointer to output image data.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_object_detect_create()
+ * @pre Prepare an inference by calling mv_object_detect_configure()
+ * @pre Prepare an inference by calling mv_object_detect_prepare()
+ * @pre Prepare an inference by calling mv_object_detect_inference()
+ */
+int mv_semantic_segmentation_get_result(mv_semantic_segmentation_h handle, unsigned int *width, unsigned int *height,
+                                                                         unsigned int *pixel_size, const unsigned char **data);
+
+/**
+ * @internal
+ * @brief Set user-given inference engine and device types for inference.
+ * @details Use this function to change the inference engine and device types for inference instead of default ones after calling @ref mv_semantic_segmentation_create().
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle        The handle to the semantic segmentation object.
+ * @param[in] engine_type  A string of inference engine type.
+ * @param[in] device_type   A string of device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a semantic segmentation handle by calling @ref mv_semantic_segmentation_create()
+ */
+int mv_semantic_segmentation_set_engine(mv_semantic_segmentation_h handle, const char *engine_type,
+                                                                         const char *device_type);
+
+/**
+ * @internal
+ * @brief Get a number of inference engines available for semantic segmentation task API.
+ * @details Use this function to get how many inference engines are supported for semantic segmentation after calling @ref mv_semantic_segmentation_create().
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle         The handle to the semantic segmentation object.
+ * @param[out] engine_count  A number of inference engines available for semantic segmentation API.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a semantic segmentation handle by calling @ref mv_semantic_segmentation_create()
+ */
+int mv_semantic_segmentation_get_engine_count(mv_semantic_segmentation_h handle, unsigned int *engine_count);
+
+/**
+ * @internal
+ * @brief Get engine type to a given inference engine index.
+ * @details Use this function to get inference engine type with a given engine index after calling @ref mv_semantic_segmentation_get_engine_count().
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle        The handle to the semantic segmentation object.
+ * @param[in] engine_index  A inference engine index for getting the inference engine type.
+ * @param[out] engine_type  A string to inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Get a number of inference engines available for semantic segmentation task API by calling @ref mv_semantic_segmentation_get_engine_count()
+ */
+int mv_semantic_segmentation_get_engine_type(mv_semantic_segmentation_h handle, const unsigned int engine_index,
+                                                                                  char **engine_type);
+
+/**
+ * @internal
+ * @brief Get a number of device types available to a given inference engine.
+ * @details Use this function to get how many device types are supported for a given inference engine after calling @ref mv_semantic_segmentation_create().
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle         The handle to the semantic segmentation object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[out] device_count  A number of device types available for a given inference engine.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a semantic segmentation handle by calling @ref mv_semantic_segmentation_create()
+ */
+int mv_semantic_segmentation_get_device_count(mv_semantic_segmentation_h handle, const char *engine_type,
+                                                                                       unsigned int *device_count);
+
+/**
+ * @internal
+ * @brief Get device type list available.
+ * @details Use this function to get what device types are supported for current inference engine type after calling @ref mv_semantic_segmentation_configure().
+ *
+ * @since_tizen 10.0
+ *
+ * @param[in] handle         The handle to the semantic segmentation object.
+ * @param[in] engine_type    A inference engine string.
+ * @param[in] device_index   A device index for getting the device type.
+ * @param[out] device_type   A string to device type.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Create a semantic segmentation handle by calling @ref mv_semantic_segmentation_create()
+ * @pre Configure semantic segmentation task by calling @ref mv_semantic_segmentation_configure()
+ */
+int mv_semantic_segmentation_get_device_type(mv_semantic_segmentation_h handle, const char *engine_type,
+                                                                                  const unsigned int device_index, char **device_type);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_SEMANTIC_SEGMENTATION_INTERNAL_H__ */
diff --git a/include/mv_semantic_segmentation_type.h b/include/mv_semantic_segmentation_type.h
new file mode 100644 (file)
index 0000000..0e4f9c5
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_MV_SEMANTIC_SEGMENTATION_TYPE_H__
+#define __TIZEN_MEDIAVISION_MV_SEMANTIC_SEGMENTATION_TYPE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file   mv_semantic_segmentation_type.h
+ * @brief  This file contains the semantic segmentation handle for Mediavision.
+ */
+
+/**
+ * @addtogroup CAPI_MEDIA_VISION_SELFIE_SEGMENTATION_MODULE
+ * @{
+ */
+
+/**
+ * @brief The semantic segmentation object handle.
+ *
+ * @since_tizen 10.0
+ */
+typedef void *mv_semantic_segmentation_h;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_MV_SELFIE_SEGMENTATION_TYPE_H__ */
index 3ccb5f71f5a2b5a058ccaf164a646b0ed6d1cf85..abebad9d8c6e04dce09a89d39bf2047ee6b0d21d 100644 (file)
@@ -19,6 +19,8 @@ install(
        FILES_MATCHING
        PATTERN "mv_selfie_segmentation_internal.h"
        PATTERN "mv_selfie_segmentation_type.h"
+       PATTERN "mv_semantic_segmentation_internal.h"
+       PATTERN "mv_semantic_segmentation_type.h"
        )
 install(
        DIRECTORY ${PROJECT_SOURCE_DIR}/include/ DESTINATION include/media
diff --git a/mv_machine_learning/image_segmentation/include/DeeplabV3.h b/mv_machine_learning/image_segmentation/include/DeeplabV3.h
new file mode 100644 (file)
index 0000000..a9a0666
--- /dev/null
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEEPLAB_V3_H__
+#define __DEEPLAB_V3_H__
+
+#include "mv_private.h"
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "ImageSegmentation.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T> class DeeplabV3 : public ImageSegmentation<T>
+{
+       using ImageSegmentation<T>::_config;
+       using ImageSegmentation<T>::_preprocess;
+       using ImageSegmentation<T>::_labels;
+
+private:
+       ImageSegmentationResult _result;
+
+public:
+       DeeplabV3(std::shared_ptr<Config> config);
+       ~DeeplabV3();
+
+       ImageSegmentationResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
index 3dfb7b6a4c3d09f7737dc7167a7be71d6d700507..2b8fc53a6b44a30a0bacb10fd914425a33bb28f6 100644 (file)
@@ -67,7 +67,7 @@ protected:
        Preprocess _preprocess;
 
        void getOutputNames(std::vector<std::string> &names);
-       void getOutputTensor(std::string target_name, std::vector<float> &tensor);
+       void getOutputTensor(std::string target_name, std::vector<T> &tensor);
        void inference(std::vector<std::vector<T> > &inputVectors);
        virtual ImageSegmentationResult &result() = 0;
 
diff --git a/mv_machine_learning/image_segmentation/include/SemanticSegmentationAdapter.h b/mv_machine_learning/image_segmentation/include/SemanticSegmentationAdapter.h
new file mode 100644 (file)
index 0000000..ba8a8ac
--- /dev/null
@@ -0,0 +1,65 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SEMANTIC_SEGMENTATION_ADAPTER_H__
+#define __SEMANTIC_SEGMENTATION_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "IImageSegmentation.h"
+#include "ITask.h"
+#include "ImageSegmentation.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class SemanticSegmentationAdapter : public mediavision::common::ITask
+{
+private:
+       std::unique_ptr<IImageSegmentation> _semantic_segmentation;
+       std::shared_ptr<Config> _config;
+       const std::string _config_file_name = "semantic_segmentation.json";
+       const std::string _plugin_config_file_name = "semantic_segmentation_plugin.json";
+
+       void create(const std::string &model_name = "");
+       template<typename U> void create(ImageSegmentationTaskType task_type);
+       ImageSegmentationTaskType convertToTaskType(std::string model_name);
+
+public:
+       SemanticSegmentationAdapter();
+       ~SemanticSegmentationAdapter();
+
+       void setModelInfo(const std::string &model_file, const std::string &meta_file, const std::string &label_file,
+                                         const std::string &model_name) override;
+       void setEngineInfo(const std::string &engine_type, const std::string &device_type) override;
+       void configure() override;
+       unsigned int getNumberOfEngines() override;
+       const std::string &getEngineType(unsigned int engine_index) override;
+       unsigned int getNumberOfDevices(const std::string &engine_type) override;
+       const std::string &getDeviceType(const std::string &engine_type, unsigned int device_index) override;
+       void prepare() override;
+       void perform(InputBaseType &input) override;
+       void performAsync(InputBaseType &input) override;
+       OutputBaseType &getOutput() override;
+       OutputBaseType &getOutputCache() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
index df1d68d0b0939425a611a3bd8c68aa8b48b56802..82b16198fca0792de0d0a1d724ca1fe8f3d1c4a4 100644 (file)
@@ -41,11 +41,13 @@ struct ImageSegmentationResult : public OutputBaseType {
        unsigned int height {};
        unsigned int pixel_size {};
        std::vector<unsigned char> data;
+       std::vector<std::string> labels;
 };
 
 enum class ImageSegmentationTaskType {
        IMAGE_SEGMENTATION_TASK_NONE = 0,
        SELFIE_SEGMENTATION,
+       DEEPLAB_V3_MOBILENET_V2
        // TODO
 };
 
diff --git a/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json b/mv_machine_learning/image_segmentation/meta/semantic_segmentation.json
new file mode 100644 (file)
index 0000000..d2020a2
--- /dev/null
@@ -0,0 +1,40 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "MODEL_DEFAULT_PATH",
+            "type" : "string",
+            "value" : "/opt/usr/globalapps/mediavision.image.segmentation/models/hailo8l/"
+        },
+               {
+            "name"  : "MODEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "deeplab_v3_mobilenet_v2.hef"
+        },
+        {
+            "name"  : "DEFAULT_MODEL_NAME",
+            "type"  : "string",
+            "value" : "DEEPLAB_V3_MOBILENET_V2"
+        },
+        {
+            "name"  : "MODEL_META_FILE_NAME",
+            "type"  : "string",
+            "value" : "deeplab_v3_mobilenet_v2.json"
+        },
+        {
+            "name"  : "MODEL_LABEL_FILE_NAME",
+            "type"  : "string",
+            "value" : "deeplab_v3_mobilenet_v2.txt"
+        },
+        {
+            "name"  : "BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 7
+        },
+        {
+            "name"  : "TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 4
+        }
+    ]
+}
diff --git a/mv_machine_learning/image_segmentation/meta/semantic_segmentation_plugin.json b/mv_machine_learning/image_segmentation/meta/semantic_segmentation_plugin.json
new file mode 100644 (file)
index 0000000..a90da5d
--- /dev/null
@@ -0,0 +1,20 @@
+{
+    "attributes":
+    [
+        {
+            "name" : "PLUGIN_NAME",
+            "type" : "string",
+            "value" : "libsegmantic_segmentation_plugin.so"
+        },
+        {
+            "name"  : "DEFAULT_MODEL_NAME",
+            "type"  : "string",
+            "value" : "SEMANTIC_SEGMENTATION"
+        },
+        {
+            "name"  : "USE_PLUGIN",
+            "type"  : "boolean",
+            "value" : false
+        }
+    ]
+}
diff --git a/mv_machine_learning/image_segmentation/src/DeeplabV3.cpp b/mv_machine_learning/image_segmentation/src/DeeplabV3.cpp
new file mode 100644 (file)
index 0000000..ed284fd
--- /dev/null
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <string.h>
+
+#include "DeeplabV3.h"
+#include "MvMlException.h"
+#include "Postprocess.h"
+#include "mv_image_segmentation_config.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+DeeplabV3<T>::DeeplabV3(std::shared_ptr<Config> config)
+               : ImageSegmentation<T>(config), _result()
+{}
+
+template<typename T> DeeplabV3<T>::~DeeplabV3()
+{}
+
+template<typename T> ImageSegmentationResult &DeeplabV3<T>::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       _result = ImageSegmentationResult();
+
+       vector<string> names;
+
+       ImageSegmentation<T>::getOutputNames(names);
+
+       vector<T> outputTensor;
+
+       ImageSegmentation<T>::getOutputTensor(names[0], outputTensor);
+
+       auto &outputMetaMap = _config->getOutputMetaMap();
+       auto &metaInfo = outputMetaMap["deeplab_v3_mobilenet_v2/resize2"];
+
+       auto height = metaInfo->dims[1];
+       auto width = metaInfo->dims[2];
+       auto channel = metaInfo->dims[3];
+
+       for (unsigned int h = 0; h < height; ++h) {
+               for (unsigned int w = 0; w < width; ++w) {
+                       unsigned char max_value = 0;
+                       unsigned char top_index = 0;
+
+                       for (unsigned int c = 0; c < channel; ++c) {
+                               if (max_value < outputTensor[(h * width * channel) + (w * channel ) + c]) {
+                                       max_value = outputTensor[(h * width * channel) + (w * channel ) + c];
+                                       top_index = c;
+                               }
+                       }
+
+                       if (max_value > 150)
+                               _result.data.push_back(top_index);
+                       else
+                               _result.data.push_back(255); // If 255 then it means there is no object in that pixel.
+               }
+       }
+
+       _result.height = height;
+       _result.width = width;
+       _result.pixel_size = channel;
+       _result.labels = _labels;
+
+       return _result;
+}
+
+template class DeeplabV3<unsigned char>;
+}
+}
index de36f49e05a74b706268a00ad7576efcf13d87f2..8beaa63a89dba45185668622da02bda416c8d945 100644 (file)
@@ -234,19 +234,23 @@ template<typename T> void ImageSegmentation<T>::configurePreprocess()
                                                                metaInfo->getWidth(),
                                                                metaInfo->getHeight() };
 
-       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
-       if (normalization) {
-               config.normalize = normalization->use;
-               config.mean = normalization->mean;
-               config.std = normalization->std;
+       if (metaInfo->decodingTypeMap.find(DecodingType::NORMAL) != metaInfo->decodingTypeMap.end()) {
+               auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+               if (normalization) {
+                       config.normalize = normalization->use;
+                       config.mean = normalization->mean;
+                       config.std = normalization->std;
+               }
        }
 
-       auto quantization =
-                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
-       if (quantization) {
-               config.quantize = quantization->use;
-               config.scale = quantization->scale;
-               config.zeropoint = quantization->zeropoint;
+       if (metaInfo->decodingTypeMap.find(DecodingType::QUANTIZATION) != metaInfo->decodingTypeMap.end()) {
+               auto quantization =
+                               static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+               if (quantization) {
+                       config.quantize = quantization->use;
+                       config.scale = quantization->scale;
+                       config.zeropoint = quantization->zeropoint;
+               }
        }
 
        _preprocess.setConfig(config);
@@ -342,7 +346,7 @@ template<typename T> void ImageSegmentation<T>::getOutputNames(vector<string> &n
                names.push_back(it->first);
 }
 
-template<typename T> void ImageSegmentation<T>::getOutputTensor(string target_name, vector<float> &tensor)
+template<typename T> void ImageSegmentation<T>::getOutputTensor(string target_name, vector<T> &tensor)
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
 
@@ -350,9 +354,9 @@ template<typename T> void ImageSegmentation<T>::getOutputTensor(string target_na
        if (!tensor_buffer)
                throw InvalidOperation("Fail to get tensor buffer.");
 
-       auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
+       auto raw_buffer = static_cast<T *>(tensor_buffer->buffer);
 
-       copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
+       copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(T)], back_inserter(tensor));
 }
 
 template class ImageSegmentation<unsigned char>;
diff --git a/mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp b/mv_machine_learning/image_segmentation/src/SemanticSegmentationAdapter.cpp
new file mode 100644 (file)
index 0000000..d6abfe6
--- /dev/null
@@ -0,0 +1,169 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SemanticSegmentationAdapter.h"
+#include "DeeplabV3.h"
+#include "MvMlException.h"
+#include "mv_image_segmentation_config.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+SemanticSegmentationAdapter::SemanticSegmentationAdapter()
+{
+       _config = make_shared<Config>();
+
+       // If the model type needs external plugin then bypass to load the meta file and just create the external plugin.
+       // In this case, external plugin will use its own meta file approach regardless of Mediavision's one.
+       _config->parsePluginConfigFile(_plugin_config_file_name);
+       if (!_config->isPluginUsed())
+               _config->parseConfigFile(_config_file_name);
+
+       create(_config->getDefaultModelName());
+}
+
+SemanticSegmentationAdapter::~SemanticSegmentationAdapter()
+{
+       _semantic_segmentation->preDestroy();
+}
+
+template<typename U> void SemanticSegmentationAdapter::create(ImageSegmentationTaskType task_type)
+{
+       switch (task_type) {
+       case ImageSegmentationTaskType::DEEPLAB_V3_MOBILENET_V2:
+               _semantic_segmentation = make_unique<DeeplabV3<U> >(_config);
+               break;
+       default:
+               throw InvalidOperation("Invalid semantic segmentation task type.");
+       }
+       // TODO.
+}
+
+void SemanticSegmentationAdapter::create(const std::string &model_name)
+{
+       auto task_type = convertToTaskType(model_name.empty() ? _config->getDefaultModelName() : model_name);
+
+       if (_config->isPluginUsed()) {
+               // TODO.
+               return;
+       }
+
+       _config->loadMetaFile(make_unique<ImageSegmentationParser>(static_cast<int>(task_type)));
+       mv_inference_data_type_e dataType = _config->getInputMetaMap().begin()->second->dataType;
+
+       switch (dataType) {
+       case MV_INFERENCE_DATA_UINT8:
+               create<unsigned char>(task_type);
+               break;
+       default:
+               throw InvalidOperation("Invalid semantic segmentation data type.");
+       }
+}
+
+ImageSegmentationTaskType SemanticSegmentationAdapter::convertToTaskType(string model_name)
+{
+       if (model_name.empty())
+               throw InvalidParameter("model name is empty.");
+
+       transform(model_name.begin(), model_name.end(), model_name.begin(), ::toupper);
+
+       if (model_name == "DEEPLAB_V3_MOBILENET_V2")
+               return ImageSegmentationTaskType::DEEPLAB_V3_MOBILENET_V2;
+
+       throw InvalidParameter("Invalid semantic segmentation model name.");
+}
+
+void SemanticSegmentationAdapter::setModelInfo(const string &model_file, const string &meta_file, const string &label_file,
+                                                                                       const string &model_name)
+{
+       try {
+               _config->setUserModel(model_file, meta_file, label_file);
+               create(model_name);
+       } catch (const BaseException &e) {
+               LOGW("A given model name is invalid so default task type will be used.");
+       }
+
+       if (model_file.empty() && meta_file.empty()) {
+               LOGW("Given model info is invalid so default model info will be used instead.");
+               return;
+       }
+
+       _semantic_segmentation->setUserModel(model_file, meta_file, label_file);
+}
+
+void SemanticSegmentationAdapter::setEngineInfo(const string &engine_type, const string &device_type)
+{
+       _semantic_segmentation->setEngineInfo(engine_type, device_type);
+}
+
+void SemanticSegmentationAdapter::configure()
+{
+       _semantic_segmentation->configure();
+}
+
+unsigned int SemanticSegmentationAdapter::getNumberOfEngines()
+{
+       return _semantic_segmentation->getNumberOfEngines();
+}
+
+const string &SemanticSegmentationAdapter::getEngineType(unsigned int engine_index)
+{
+       return _semantic_segmentation->getEngineType(engine_index);
+}
+
+unsigned int SemanticSegmentationAdapter::getNumberOfDevices(const string &engine_type)
+{
+       return _semantic_segmentation->getNumberOfDevices(engine_type);
+}
+
+const string &SemanticSegmentationAdapter::getDeviceType(const string &engine_type, unsigned int device_index)
+{
+       return _semantic_segmentation->getDeviceType(engine_type, device_index);
+}
+
+void SemanticSegmentationAdapter::prepare()
+{
+       _semantic_segmentation->prepare();
+}
+
+void SemanticSegmentationAdapter::perform(InputBaseType &input)
+{
+       _semantic_segmentation->perform(input.inference_src);
+}
+
+OutputBaseType &SemanticSegmentationAdapter::getOutput()
+{
+       return _semantic_segmentation->getOutput();
+}
+
+OutputBaseType &SemanticSegmentationAdapter::getOutputCache()
+{
+       return _semantic_segmentation->getOutputCache();
+}
+
+void SemanticSegmentationAdapter::performAsync(InputBaseType &input)
+{
+       _semantic_segmentation->performAsync(static_cast<ImageSegmentationInput &>(input));
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp b/mv_machine_learning/image_segmentation/src/mv_semantic_segmentation.cpp
new file mode 100644 (file)
index 0000000..cdda347
--- /dev/null
@@ -0,0 +1,337 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Context.h"
+#include "ITask.h"
+#include "SemanticSegmentationAdapter.h"
+#include "MvMlException.h"
+#include "image_segmentation_type.h"
+#include "mv_feature_key.h"
+#include "mv_private.h"
+#include "mv_semantic_segmentation_internal.h"
+#include "mv_semantic_segmentation_type.h"
+#include "native_capi.h"
+
+#include <algorithm>
+#include <exception>
+#include <iostream>
+#include <mutex>
+#include <new>
+#include <string>
+#include <unistd.h>
+
+#define TASK_NAME "semantic_segmentation"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::common;
+using namespace mediavision::machine_learning;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+
+static const char *feature_keys[] = { "http://tizen.org/feature/vision.inference.image",
+                                                                         "http://tizen.org/feature/vision.inference.face" };
+static const size_t num_keys = sizeof(feature_keys) / sizeof(char *);
+
+int mv_semantic_segmentation_create(mv_semantic_segmentation_h *handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_NULL_ARG_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       mv_semantic_segmentation_h ctx = nullptr;
+
+       try {
+               ctx = machine_learning_native_create();
+               machine_learning_native_add(ctx, TASK_NAME, new SemanticSegmentationAdapter());
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       } catch (const std::exception &e) {
+               LOGE("%s", e.what());
+               return MEDIA_VISION_ERROR_INTERNAL;
+       }
+
+       *handle = ctx;
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_destroy(mv_semantic_segmentation_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       machine_learning_native_destroy(handle);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_set_model(mv_semantic_segmentation_h handle, const char *model_file, const char *meta_file,
+                                                                        const char *label_file, const char *model_name)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_set_model(handle, TASK_NAME, model_file, meta_file, label_file, model_name);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_set_engine(mv_semantic_segmentation_h handle, const char *backend_type,
+                                                                         const char *device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(backend_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_set_engine(handle, TASK_NAME, backend_type, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_get_engine_count(mv_semantic_segmentation_h handle, unsigned int *engine_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_engine_count(handle, TASK_NAME, engine_count);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_get_engine_type(mv_semantic_segmentation_h handle, const unsigned int engine_index,
+                                                                                  char **engine_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_engine_type(handle, TASK_NAME, engine_index, engine_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_get_device_count(mv_semantic_segmentation_h handle, const char *engine_type,
+                                                                                       unsigned int *device_count)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(device_count);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_device_count(handle, TASK_NAME, engine_type, device_count);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_get_device_type(mv_semantic_segmentation_h handle, const char *engine_type,
+                                                                                  const unsigned int device_index, char **device_type)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_NULL_ARG_CHECK(engine_type);
+       MEDIA_VISION_NULL_ARG_CHECK(device_type);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_get_device_type(handle, TASK_NAME, engine_type, device_index, device_type);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_configure(mv_semantic_segmentation_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_configure(handle, TASK_NAME);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_prepare(mv_semantic_segmentation_h handle)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               machine_learning_native_prepare(handle, TASK_NAME);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_inference(mv_semantic_segmentation_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               ImageSegmentationInput input(source);
+
+               machine_learning_native_inference(handle, TASK_NAME, input);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_inference_async(mv_semantic_segmentation_h handle, mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               ImageSegmentationInput input(source);
+
+               machine_learning_native_inference_async(handle, TASK_NAME, input);
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_get_result_count(mv_semantic_segmentation_h handle, unsigned long *frame_number,
+                                                                                unsigned int *result_cnt)
+{
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_semantic_segmentation_get_result(mv_semantic_segmentation_h handle, unsigned int *width, unsigned int *height,
+                                                                         unsigned int *pixel_size, const unsigned char **data)
+{
+       MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true));
+       MEDIA_VISION_INSTANCE_CHECK(handle);
+       MEDIA_VISION_INSTANCE_CHECK(width);
+       MEDIA_VISION_INSTANCE_CHECK(height);
+       MEDIA_VISION_INSTANCE_CHECK(pixel_size);
+       MEDIA_VISION_INSTANCE_CHECK(data);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       try {
+               auto &result = static_cast<ImageSegmentationResult &>(machine_learning_native_get_result(handle, TASK_NAME));
+
+               *width = result.width;
+               *height = result.height;
+               *pixel_size = result.pixel_size;
+               *data = result.data.data();
+               // TODO
+       } catch (const BaseException &e) {
+               LOGE("%s", e.what());
+               return e.getError();
+       }
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return MEDIA_VISION_ERROR_NONE;
+}
\ No newline at end of file
index a0a80a2cdde64e8b7215fb8e480ab5cd6ff38588..bed108649a44c9d18d526b30845e1bdf8fd38399 100644 (file)
@@ -411,6 +411,8 @@ find . -name '*.gcno' -not -path "./test/*" -not -path "./mv_machine_learning/*"
 %if "%{enable_ml_image_segmentation}" == "1"
 %{_datadir}/%{name}/selfie_segmentation.json
 %{_datadir}/%{name}/selfie_segmentation_plugin.json
+%{_datadir}/%{name}/semantic_segmentation.json
+%{_datadir}/%{name}/semantic_segmentation_plugin.json
 %{_libdir}/libmv_image_segmentation.so
 %endif
 %if "%{enable_ml_gaze_tracking}" == "1"
@@ -472,6 +474,8 @@ find . -name '*.gcno' -not -path "./test/*" -not -path "./mv_machine_learning/*"
 %if "%{enable_ml_image_segmentation}" == "1"
 %{_includedir}/media/mv_selfie_segmentation_internal.h
 %{_includedir}/media/mv_selfie_segmentation_type.h
+%{_includedir}/media/mv_semantic_segmentation_internal.h
+%{_includedir}/media/mv_semantic_segmentation_type.h
 %{_includedir}/media/IImageSegmentation.h
 %{_includedir}/media/image_segmentation_type.h
 %{_libdir}/pkgconfig/*image-segmentation.pc
index fac131cd8c48d72e2cb5c54dc17e706791190a64..fc9c34daa905137f0b7ac92f08ad8ae6a05b623c 100644 (file)
@@ -1,4 +1,5 @@
 set(SRC_FILES
     ${SRC_FILES}
     testsuites/machine_learning/image_segmentation/test_selfie_segmentation.cpp
+    testsuites/machine_learning/image_segmentation/test_semantic_segmentation.cpp
 )
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/image_segmentation/test_semantic_segmentation.cpp b/test/testsuites/machine_learning/image_segmentation/test_semantic_segmentation.cpp
new file mode 100644 (file)
index 0000000..cd68184
--- /dev/null
@@ -0,0 +1,78 @@
+/**
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "../task_model_info.hpp"
+#include "ImageHelper.h"
+#include "mv_semantic_segmentation_internal.h"
+
+#define IMG_FACE TEST_RES_PATH "/res/inference/images/image1.jpg"
+
+using namespace testing;
+using namespace std;
+using namespace MediaVision::Common;
+
+TEST(SemanticSegmentationTest, InferenceShouldBeOk)
+{
+       mv_semantic_segmentation_h handle;
+       vector<test_model_input> test_models {
+               { "", "", "", "" } // If empty then default model will be used.
+               // TODO.
+       };
+
+       mv_source_h mv_source = NULL;
+       int ret = mv_create_source(&mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       ret = ImageHelper::loadImageToSource(IMG_FACE, mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+       for (const auto &model : test_models) {
+               cout << "model name : " << model.model_file << endl;
+
+               ret = mv_semantic_segmentation_create(&handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_semantic_segmentation_configure(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_semantic_segmentation_prepare(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+
+               ret = mv_semantic_segmentation_inference(handle, mv_source);
+               ASSERT_EQ(ret, 0);
+
+               unsigned int width, height, pixel_size;
+               const unsigned char *data;
+
+               ret = mv_semantic_segmentation_get_result(handle, &width, &height, &pixel_size, &data);
+               ASSERT_EQ(ret, 0);
+
+               ret = mv_semantic_segmentation_destroy(handle);
+               ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+       }
+
+       ret = mv_destroy_source(mv_source);
+       ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE);
+}