inference: add support for image classification task

author Vibhav Aggarwal <v.aggarwal@samsung.com>

Tue, 11 Jun 2024 09:56:06 +0000 (18:56 +0900)

committer Inki Dae <inki.dae@samsung.com>

Wed, 12 Jun 2024 00:57:33 +0000 (09:57 +0900)
author Vibhav Aggarwal <v.aggarwal@samsung.com>
Tue, 11 Jun 2024 09:56:06 +0000 (18:56 +0900)
committer Inki Dae <inki.dae@samsung.com>
Wed, 12 Jun 2024 00:57:33 +0000 (09:57 +0900)
diff --git a/common/include/SingleoCommonTypes.h b/common/include/SingleoCommonTypes.h

index fd9a0fbb082e88a4f1c63b62c4d7d3f966b5a84d..e8308912455fc1a8396b3bc864c6255f1fed56b7 100644 (file)
--- a/common/include/SingleoCommonTypes.h
+++ b/common/include/SingleoCommonTypes.h
@@ -131,7 +131,7 @@ struct RawDataType : public BaseDataType {
         }
  };
  
-enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK };
+enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION };
  
  struct BaseResultType {
         ResultType _type { ResultType::NONE };
@@ -177,6 +177,17 @@ struct FldResultType : public BaseResultType {
         }
  };
  
+struct IcResultType : public BaseResultType {
+       IcResultType() : BaseResultType(ResultType::IMAGE_CLASSIFICATION)
+       {}
+       std::vector<std::string> _labels;
+
+       std::shared_ptr<BaseResultType> clone() override
+       {
+               return std::make_shared<IcResultType>(*this);
+       }
+};
+
  enum class ServiceType { NONE, AUTO_ZOOM };
  
  enum class InputFeedType { NONE, CAMERA, SCREEN_CAPTURE };
diff --git a/inference/backends/mediavision/CMakeLists.txt b/inference/backends/mediavision/CMakeLists.txt

index dfea22dcfacba025aac4ffe5681e6907f0b7a5c3..dffda896703a48a2f791e07565c505130c732a4f 100644 (file)
--- a/inference/backends/mediavision/CMakeLists.txt
+++ b/inference/backends/mediavision/CMakeLists.txt
@@ -10,8 +10,9 @@ SET(SINGLEO_SERVICE_SOURCE_FILES
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceDetection.cpp
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceLandmark.cpp
      ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvObjectDetection.cpp
+    ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvImageClassification.cpp
         ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceTaskFactory.cpp
  )
  
-LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection)
+LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification)
  LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media)
diff --git a/inference/backends/mediavision/include/MvImageClassification.h b/inference/backends/mediavision/include/MvImageClassification.h

new file mode 100644 (file)

index 0000000..8744118
--- /dev/null
+++ b/inference/backends/mediavision/include/MvImageClassification.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MV_IMAGE_CLASSIFICATION_H__
+#define __MV_IMAGE_CLASSIFICATION_H__
+
+#include "IInferenceTaskInterface.h"
+#include "mv_image_classification_internal.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvImageClassification : public IInferenceTaskInterface
+{
+private:
+       mv_image_classification_h _handle {};
+       IcResultType _output_data {};
+
+public:
+       MvImageClassification();
+       virtual ~MvImageClassification();
+
+       void configure() override;
+       void prepare() override;
+       void invoke(BaseDataType &input, bool async) override;
+       BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
diff --git a/inference/backends/mediavision/src/MvImageClassification.cpp b/inference/backends/mediavision/src/MvImageClassification.cpp

new file mode 100644 (file)

index 0000000..ca1855b
--- /dev/null
+++ b/inference/backends/mediavision/src/MvImageClassification.cpp
@@ -0,0 +1,115 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include "MvImageClassification.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvImageClassification::MvImageClassification()
+{
+       int ret = mv_image_classification_create(&_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create image classification handle.");
+}
+
+MvImageClassification::~MvImageClassification()
+{
+       mv_image_classification_destroy(_handle);
+}
+
+void MvImageClassification::configure()
+{
+       int ret = mv_image_classification_configure(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to configure image classification.");
+}
+
+void MvImageClassification::prepare()
+{
+       int ret = mv_image_classification_prepare(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to prepare image classification.");
+}
+
+void MvImageClassification::invoke(BaseDataType &input, bool async)
+{
+       ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+
+       if (data._data_type != DataType::IMAGE) {
+               SINGLEO_LOGE("Invalid input type.");
+               throw invalid_argument("Input type not support.");
+       }
+
+       mv_source_h mv_src;
+
+       int ret = mv_create_source(&mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create mv source.");
+
+       try {
+               ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+                                                                          data.height, MEDIA_VISION_COLORSPACE_RGB888);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to convert to mv source.");
+
+               ret = mv_image_classification_inference(_handle, mv_src);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to invoke image classification.");
+       } catch (std::runtime_error &e) {
+               SINGLEO_LOGE("%s", e.what());
+       }
+
+       ret = mv_destroy_source(mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to destroy mv source.");
+}
+
+BaseResultType &MvImageClassification::result()
+{
+       unsigned long frame_number;
+       unsigned int result_cnt;
+
+       int ret = mv_image_classification_get_result_count(_handle, &frame_number, &result_cnt);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to get image classification result count.");
+
+       _output_data._labels.clear();
+       _output_data._frame_number = frame_number;
+
+       for (unsigned int idx = 0; idx < result_cnt; ++idx) {
+               const char *label = NULL;
+
+               ret = mv_image_classification_get_label(_handle, idx, &label);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to get image classification label.");
+
+               _output_data._labels.emplace_back(label);
+       }
+
+       return _output_data;
+}
+
+}
+}
+}
diff --git a/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp b/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp

index 3f147a54b6f9e41004407faa1d0d482f280ee48a..76ae43fc4ad5c8f2ebeea9625c393fe952d5290f 100644 (file)
--- a/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp
+++ b/inference/backends/mediavision/src/MvInferenceTaskFactory.cpp
@@ -19,6 +19,7 @@
  #include "MvFaceDetection.h"
  #include "MvFaceLandmark.h"
  #include "MvObjectDetection.h"
+#include "MvImageClassification.h"
  #include "SingleoLog.h"
  #include "SingleoException.h"
  
@@ -35,7 +36,7 @@ bool MvInferenceTaskFactory::_registered =
  
  std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createImageClassification()
  {
-       throw InvalidOperation("Interface not supported yet.");
+       return make_unique<MvImageClassification>();
  }
  
  std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createObjectDetection()
author	Vibhav Aggarwal <v.aggarwal@samsung.com>
	Tue, 11 Jun 2024 09:56:06 +0000 (18:56 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Wed, 12 Jun 2024 00:57:33 +0000 (09:57 +0900)
common/include/SingleoCommonTypes.h		patch \| blob \| history
inference/backends/mediavision/CMakeLists.txt		patch \| blob \| history
inference/backends/mediavision/include/MvImageClassification.h	[new file with mode: 0644]	patch \| blob
inference/backends/mediavision/src/MvImageClassification.cpp	[new file with mode: 0644]	patch \| blob
inference/backends/mediavision/src/MvInferenceTaskFactory.cpp		patch \| blob \| history