From: Inki Dae Date: Wed, 24 Apr 2024 06:03:10 +0000 (+0900) Subject: inference: add face landmark detection support for Mediavision X-Git-Tag: accepted/tizen/unified/20240903.110722~59 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Fchanges%2F10%2F310210%2F4;p=platform%2Fcore%2Fapi%2Fsingleo.git inference: add face landmark detection support for Mediavision Add face landmark detection support for Mediavision backend. With this patch, move '_rects' member to each specific result structure, and add _points to FldResultType for face landmark detection. Change-Id: If0e801c6c01da247870e9ac3632d5953d90633c8 Signed-off-by: Inki Dae --- diff --git a/common/include/SingleoCommonTypes.h b/common/include/SingleoCommonTypes.h index d048ba4..47ecb88 100644 --- a/common/include/SingleoCommonTypes.h +++ b/common/include/SingleoCommonTypes.h @@ -28,6 +28,11 @@ struct Rect { int bottom {}; }; +struct Point { + int x {}; + int y {}; +}; + using VecRect = std::vector; enum class DataType { NONE, FILE, IMAGE, RAW }; @@ -65,12 +70,11 @@ struct RawDataType : public BaseDataType { size_t size_in_bytes {}; }; -enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, LANDMARK }; +enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK }; struct BaseResultType { ResultType _type { ResultType::NONE }; unsigned int _frame_number {}; - std::vector _rects; BaseResultType(ResultType type) : _type(type) {} virtual ~BaseResultType() @@ -80,13 +84,19 @@ struct BaseResultType { struct OdResultType : public BaseResultType { OdResultType() : BaseResultType(ResultType::OBJECT_DETECTION) {} - // TODO + std::vector _rects; }; struct FdResultType : public BaseResultType { FdResultType() : BaseResultType(ResultType::FACE_DETECTION) {} - // TODO + std::vector _rects; +}; + +struct FldResultType : public BaseResultType { + FldResultType() : BaseResultType(ResultType::FACE_LANDMARK) + {} + std::vector _points; }; enum class ServiceType { NONE, AUTO_ZOOM }; diff --git a/inference/backends/mediavision/CMakeLists.txt b/inference/backends/mediavision/CMakeLists.txt index 6cbc300..c630fb0 100644 --- a/inference/backends/mediavision/CMakeLists.txt +++ b/inference/backends/mediavision/CMakeLists.txt @@ -8,9 +8,10 @@ SET(INFERENCE_MEDIAVISION_BACKEND_DIRECTORY ${INFERENCE_DIRECTORY}/backends/medi SET(SINGLEO_SERVICE_SOURCE_FILES ${SINGLEO_SERVICE_SOURCE_FILES} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceDetection.cpp + ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceLandmark.cpp ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvObjectDetection.cpp ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceServiceFactory.cpp ) -LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection) +LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection) LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media) diff --git a/inference/backends/mediavision/include/MvFaceLandmark.h b/inference/backends/mediavision/include/MvFaceLandmark.h new file mode 100644 index 0000000..e70f3e5 --- /dev/null +++ b/inference/backends/mediavision/include/MvFaceLandmark.h @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MV_FACE_LANDMARK_H__ +#define __MV_FACE_LANDMARK_H__ + +#include "IInferenceServiceInterface.h" +#include "mv_facial_landmark_internal.h" +#include "SingleoCommonTypes.h" + +namespace singleo +{ +namespace inference +{ +namespace backends +{ +class MvFaceLandmark : public IInferenceServiceInterface +{ +private: + mv_facial_landmark_h _handle {}; + FldResultType _output_data; + +public: + MvFaceLandmark(); + virtual ~MvFaceLandmark(); + + void configure() override; + void prepare() override; + void invoke(BaseDataType &input, bool async) override; + BaseResultType &result() override; +}; + +} // backends +} // inference +} // singleo + +#endif diff --git a/inference/backends/mediavision/src/MvFaceLandmark.cpp b/inference/backends/mediavision/src/MvFaceLandmark.cpp new file mode 100644 index 0000000..99e73d4 --- /dev/null +++ b/inference/backends/mediavision/src/MvFaceLandmark.cpp @@ -0,0 +1,116 @@ +/** + * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "SingleoInputManager.h" +#include "MvFaceLandmark.h" +#include "SingleoLog.h" + +using namespace std; + +namespace singleo +{ +namespace inference +{ +namespace backends +{ +MvFaceLandmark::MvFaceLandmark() +{ + int ret = mv_facial_landmark_create(&_handle); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to create face landmark detection handle."); +} + +MvFaceLandmark::~MvFaceLandmark() +{ + mv_facial_landmark_destroy(_handle); +} + +void MvFaceLandmark::configure() +{ + int ret = mv_facial_landmark_configure(_handle); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to configure face landmark detection."); +} + +void MvFaceLandmark::prepare() +{ + int ret = mv_facial_landmark_prepare(_handle); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to prepare face landmark detection."); +} + +void MvFaceLandmark::invoke(BaseDataType &input, bool async) +{ + ImageDataType &data = dynamic_cast(input); + + if (data._data_type != DataType::IMAGE) { + SINGLEO_LOGE("Invalid input type."); + throw invalid_argument("Input type not support."); + } + + mv_source_h mv_src; + + int ret = mv_create_source(&mv_src); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to create mv source."); + + try { + ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width, + data.height, MEDIA_VISION_COLORSPACE_RGB888); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to convert to mv source."); + + ret = mv_facial_landmark_inference(_handle, mv_src); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to invoke face landmark detection."); + } catch (std::runtime_error &e) { + SINGLEO_LOGE("%s", e.what()); + } + + ret = mv_destroy_source(mv_src); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to destroy mv source."); +} + +BaseResultType &MvFaceLandmark::result() +{ + unsigned long frame_number; + unsigned int result_cnt; + + int ret = mv_facial_landmark_get_result_count(_handle, &frame_number, &result_cnt); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to get face landmark detection result count."); + + _output_data._points.clear(); + _output_data._frame_number = frame_number; + + for (unsigned int idx = 0; idx < result_cnt; ++idx) { + Point point; + + ret = mv_facial_landmark_get_position(_handle, idx, (unsigned int *) &point.x, (unsigned int *) &point.y); + if (ret != MEDIA_VISION_ERROR_NONE) + throw runtime_error("Fail to get face landmark detection point."); + + _output_data._points.push_back(point); + } + + return _output_data; +} + +} +} +} diff --git a/inference/backends/mediavision/src/MvInferenceServiceFactory.cpp b/inference/backends/mediavision/src/MvInferenceServiceFactory.cpp index 5ff3b7b..c35e1d9 100644 --- a/inference/backends/mediavision/src/MvInferenceServiceFactory.cpp +++ b/inference/backends/mediavision/src/MvInferenceServiceFactory.cpp @@ -17,6 +17,7 @@ #include "InferenceServiceFactory.h" #include "MvInferenceServiceFactory.h" #include "MvFaceDetection.h" +#include "MvFaceLandmark.h" #include "MvObjectDetection.h" #include "SingleoLog.h" #include "SingleoException.h" @@ -49,7 +50,7 @@ std::unique_ptr MvInferenceServiceFactory::createFac std::unique_ptr MvInferenceServiceFactory::createFaceLandmarkDetection() { - throw InvalidOperation("Interface not supported yet."); + return make_unique(); } } diff --git a/services/auto_zoom/src/AutoZoom.cpp b/services/auto_zoom/src/AutoZoom.cpp index bf3c5df..d9575b5 100644 --- a/services/auto_zoom/src/AutoZoom.cpp +++ b/services/auto_zoom/src/AutoZoom.cpp @@ -188,13 +188,13 @@ void AutoZoom::performAsync() void AutoZoom::updateResult(BaseDataType &in_data) { auto &output_data = _inference_service->result(); - AutoZoomResult autozoom_result; - vector rects; if (output_data._type != ResultType::OBJECT_DETECTION && output_data._type != ResultType::FACE_DETECTION) throw InvalidParameter("Invalid result type"); - rects = output_data._rects; + vector &rects = dynamic_cast(output_data)._rects; + AutoZoomResult autozoom_result; + autozoom_result.frame_number = output_data._frame_number; autozoom_result.num_of_objects = rects.size();