inference: add face landmark detection support for Mediavision 10/310210/4 inferenceservice_redesign_#01
authorInki Dae <inki.dae@samsung.com>
Wed, 24 Apr 2024 06:03:10 +0000 (15:03 +0900)
committerInki Dae <inki.dae@samsung.com>
Fri, 26 Apr 2024 01:59:15 +0000 (10:59 +0900)
Add face landmark detection support for Mediavision backend.

With this patch, move '_rects' member to each specific result structure,
and add _points to FldResultType for face landmark detection.

Change-Id: If0e801c6c01da247870e9ac3632d5953d90633c8
Signed-off-by: Inki Dae <inki.dae@samsung.com>
common/include/SingleoCommonTypes.h
inference/backends/mediavision/CMakeLists.txt
inference/backends/mediavision/include/MvFaceLandmark.h [new file with mode: 0644]
inference/backends/mediavision/src/MvFaceLandmark.cpp [new file with mode: 0644]
inference/backends/mediavision/src/MvInferenceServiceFactory.cpp
services/auto_zoom/src/AutoZoom.cpp

index d048ba4b3be7bd22485cf2fd9596b680ddb80c07..47ecb8839dce565040c3516b7546b9f003c67c9b 100644 (file)
@@ -28,6 +28,11 @@ struct Rect {
        int bottom {};
 };
 
+struct Point {
+       int x {};
+       int y {};
+};
+
 using VecRect = std::vector<Rect>;
 
 enum class DataType { NONE, FILE, IMAGE, RAW };
@@ -65,12 +70,11 @@ struct RawDataType : public BaseDataType {
        size_t size_in_bytes {};
 };
 
-enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, LANDMARK };
+enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK };
 
 struct BaseResultType {
        ResultType _type { ResultType::NONE };
        unsigned int _frame_number {};
-       std::vector<Rect> _rects;
        BaseResultType(ResultType type) : _type(type)
        {}
        virtual ~BaseResultType()
@@ -80,13 +84,19 @@ struct BaseResultType {
 struct OdResultType : public BaseResultType {
        OdResultType() : BaseResultType(ResultType::OBJECT_DETECTION)
        {}
-       // TODO
+       std::vector<Rect> _rects;
 };
 
 struct FdResultType : public BaseResultType {
        FdResultType() : BaseResultType(ResultType::FACE_DETECTION)
        {}
-       // TODO
+       std::vector<Rect> _rects;
+};
+
+struct FldResultType : public BaseResultType {
+       FldResultType() : BaseResultType(ResultType::FACE_LANDMARK)
+       {}
+       std::vector<Point> _points;
 };
 
 enum class ServiceType { NONE, AUTO_ZOOM };
index 6cbc3001c592863c7234f6d8042d35524d6ec358..c630fb096ff2a89ab3103f4b4bb072a3d5321393 100644 (file)
@@ -8,9 +8,10 @@ SET(INFERENCE_MEDIAVISION_BACKEND_DIRECTORY ${INFERENCE_DIRECTORY}/backends/medi
 SET(SINGLEO_SERVICE_SOURCE_FILES
     ${SINGLEO_SERVICE_SOURCE_FILES}
     ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceDetection.cpp
+    ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceLandmark.cpp
     ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvObjectDetection.cpp
        ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceServiceFactory.cpp
 )
 
-LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection)
+LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection)
 LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media)
diff --git a/inference/backends/mediavision/include/MvFaceLandmark.h b/inference/backends/mediavision/include/MvFaceLandmark.h
new file mode 100644 (file)
index 0000000..e70f3e5
--- /dev/null
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MV_FACE_LANDMARK_H__
+#define __MV_FACE_LANDMARK_H__
+
+#include "IInferenceServiceInterface.h"
+#include "mv_facial_landmark_internal.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvFaceLandmark : public IInferenceServiceInterface
+{
+private:
+       mv_facial_landmark_h _handle {};
+       FldResultType _output_data;
+
+public:
+       MvFaceLandmark();
+       virtual ~MvFaceLandmark();
+
+       void configure() override;
+       void prepare() override;
+       void invoke(BaseDataType &input, bool async) override;
+       BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
diff --git a/inference/backends/mediavision/src/MvFaceLandmark.cpp b/inference/backends/mediavision/src/MvFaceLandmark.cpp
new file mode 100644 (file)
index 0000000..99e73d4
--- /dev/null
@@ -0,0 +1,116 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include "SingleoInputManager.h"
+#include "MvFaceLandmark.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvFaceLandmark::MvFaceLandmark()
+{
+       int ret = mv_facial_landmark_create(&_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create face landmark detection handle.");
+}
+
+MvFaceLandmark::~MvFaceLandmark()
+{
+       mv_facial_landmark_destroy(_handle);
+}
+
+void MvFaceLandmark::configure()
+{
+       int ret = mv_facial_landmark_configure(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to configure face landmark detection.");
+}
+
+void MvFaceLandmark::prepare()
+{
+       int ret = mv_facial_landmark_prepare(_handle);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to prepare face landmark detection.");
+}
+
+void MvFaceLandmark::invoke(BaseDataType &input, bool async)
+{
+       ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+
+       if (data._data_type != DataType::IMAGE) {
+               SINGLEO_LOGE("Invalid input type.");
+               throw invalid_argument("Input type not support.");
+       }
+
+       mv_source_h mv_src;
+
+       int ret = mv_create_source(&mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to create mv source.");
+
+       try {
+               ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+                                                                          data.height, MEDIA_VISION_COLORSPACE_RGB888);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to convert to mv source.");
+
+               ret = mv_facial_landmark_inference(_handle, mv_src);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to invoke face landmark detection.");
+       } catch (std::runtime_error &e) {
+               SINGLEO_LOGE("%s", e.what());
+       }
+
+       ret = mv_destroy_source(mv_src);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to destroy mv source.");
+}
+
+BaseResultType &MvFaceLandmark::result()
+{
+       unsigned long frame_number;
+       unsigned int result_cnt;
+
+       int ret = mv_facial_landmark_get_result_count(_handle, &frame_number, &result_cnt);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw runtime_error("Fail to get face landmark detection result count.");
+
+       _output_data._points.clear();
+       _output_data._frame_number = frame_number;
+
+       for (unsigned int idx = 0; idx < result_cnt; ++idx) {
+               Point point;
+
+               ret = mv_facial_landmark_get_position(_handle, idx, (unsigned int *) &point.x, (unsigned int *) &point.y);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw runtime_error("Fail to get face landmark detection point.");
+
+               _output_data._points.push_back(point);
+       }
+
+       return _output_data;
+}
+
+}
+}
+}
index 5ff3b7b8507fa99c41620a1f751e07c8b24676cb..c35e1d9ba17447e49454f538b03450428c29749c 100644 (file)
@@ -17,6 +17,7 @@
 #include "InferenceServiceFactory.h"
 #include "MvInferenceServiceFactory.h"
 #include "MvFaceDetection.h"
+#include "MvFaceLandmark.h"
 #include "MvObjectDetection.h"
 #include "SingleoLog.h"
 #include "SingleoException.h"
@@ -49,7 +50,7 @@ std::unique_ptr<IInferenceServiceInterface> MvInferenceServiceFactory::createFac
 
 std::unique_ptr<IInferenceServiceInterface> MvInferenceServiceFactory::createFaceLandmarkDetection()
 {
-       throw InvalidOperation("Interface not supported yet.");
+       return make_unique<MvFaceLandmark>();
 }
 
 }
index bf3c5df25c809568dbc4dce8cf2be1682f4f4271..d9575b57ad2ba470b5c4a4ef30bd4038d4163ffb 100644 (file)
@@ -188,13 +188,13 @@ void AutoZoom::performAsync()
 void AutoZoom::updateResult(BaseDataType &in_data)
 {
        auto &output_data = _inference_service->result();
-       AutoZoomResult autozoom_result;
-       vector<Rect> rects;
 
        if (output_data._type != ResultType::OBJECT_DETECTION && output_data._type != ResultType::FACE_DETECTION)
                throw InvalidParameter("Invalid result type");
 
-       rects = output_data._rects;
+       vector<Rect> &rects = dynamic_cast<FdResultType &>(output_data)._rects;
+       AutoZoomResult autozoom_result;
+
        autozoom_result.frame_number = output_data._frame_number;
        autozoom_result.num_of_objects = rects.size();