int bottom {};
};
+struct Point {
+ unsigned int x {};
+ unsigned int y {};
+ unsigned int z {};
+};
+
using VecRect = std::vector<Rect>;
enum class DataType { NONE, FILE, IMAGE, RAW };
// TODO
};
+struct FldResultType : public BaseResultType {
+ std::vector<Point> _landmarks;
+ FldResultType() : BaseResultType(ResultType::LANDMARK)
+ {}
+ // TODO
+};
+
enum class ServiceType { NONE, AUTO_ZOOM, SMART_POINTER };
enum class InputFeedType { NONE, CAMERA, SCREEN_CAPTURE };
ADD_LIBRARY(${PROJECT_NAME} SHARED ${MEDIAVISION_SOURCE_FILES})
FIND_PACKAGE(PkgConfig REQUIRED)
-PKG_CHECK_MODULES(${PROJECT_NAME}_DEP REQUIRED capi-media-vision)
+PKG_CHECK_MODULES(${PROJECT_NAME}_DEP REQUIRED capi-media-vision opencv)
TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE ../include ../../common/include ../../log/include mediavision/include /usr/include/media)
-TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE mv_common singleo_log mv_inference mv_object_detection)
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE mv_common singleo_log ${${PROJECT_NAME}_DEP_LIBRARIES} )
# Install the library
INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
void configure() override;
void prepare() override;
- void invoke(BaseDataType &input, bool async, BaseResultType auxiliary_input = BaseResultType(ResultType::NONE)) override;
+ void invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async) override;
BaseResultType &result() override;
};
void configure() override;
void prepare() override;
- void invoke(BaseDataType &input, bool async, BaseResultType auxiliary_input = BaseResultType(ResultType::NONE)) override;
+ void invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async) override;
BaseResultType &result() override;
};
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACE_LANDMARK_DETECTION_MULTI_H__
+#define __FACE_LANDMARK_DETECTION_MULTI_H__
+
+#include <thread>
+#include "IInferenceTaskInterface.h"
+#include "mv_facial_landmark_internal.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvFaceLandmarkDetectionMulti : public IInferenceTaskInterface
+{
+private:
+ mv_facial_landmark_h _handle {};
+ FldResultType _output_data {};
+ mv_source_h _mv_src {};
+
+ void cropFaceRegion(BaseDataType& input, ImageDataType& output, BaseResultType& roi);
+
+public:
+ MvFaceLandmarkDetectionMulti();
+ virtual ~MvFaceLandmarkDetectionMulti();
+
+ void configure() override;
+ void prepare() override;
+ void invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async) override;
+ BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
void configure() override;
void prepare() override;
- void invoke(BaseDataType &input, bool async, BaseResultType auxiliary_input = BaseResultType(ResultType::NONE)) override;
+ void invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async) override;
BaseResultType &result() override;
};
throw runtime_error("Fail to prepare face detection.");
}
-void MvFaceDetection::invoke(BaseDataType &input, bool async, BaseResultType auxiliary_input)
+void MvFaceDetection::invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async)
{
ImageDataType &data = dynamic_cast<ImageDataType &>(input);
}
-void MvFaceDetectionMulti::invoke(BaseDataType &input, bool async, BaseResultType auxiliary_input)
+void MvFaceDetectionMulti::invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async)
{
ImageDataType &data = dynamic_cast<ImageDataType &>(input);
throw runtime_error("Fail to get face detection bound box.");
_output_data._rects.push_back(rect);
+ SINGLEO_LOGD("idx[%2zd]: (%3zd, %3zd, %3zd, %3zd)", idx, rect.left, rect.top, rect.right, rect.bottom);
}
return _output_data;
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include <opencv2/core.hpp>
+#include "SingleoInputManager.h"
+#include "MvFaceLandmarkDetectionMulti.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvFaceLandmarkDetectionMulti::MvFaceLandmarkDetectionMulti()
+{
+ int ret = mv_facial_landmark_create(&_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to create face landmark detection handle.");
+
+ ret = mv_create_source(&_mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to create mv source.");
+}
+
+MvFaceLandmarkDetectionMulti::~MvFaceLandmarkDetectionMulti()
+{
+ mv_facial_landmark_destroy(_handle);
+ mv_destroy_source(_mv_src);
+}
+
+void MvFaceLandmarkDetectionMulti::configure()
+{
+ int ret = mv_facial_landmark_configure(_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to configure face landmark detection.");
+}
+
+void MvFaceLandmarkDetectionMulti::prepare()
+{
+ int ret = mv_facial_landmark_prepare(_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to prepare face landmark detection.");
+}
+
+void MvFaceLandmarkDetectionMulti::cropFaceRegion(BaseDataType& input, ImageDataType& output, BaseResultType& roi)
+{
+ if (roi._rects.empty()) {
+ output = dynamic_cast<ImageDataType&>(input);
+ } else {
+ ImageDataType& data = dynamic_cast<ImageDataType&>(input);
+ cv::Mat cvData(cv::Size(data.width, data.height), CV_MAKETYPE(CV_8U, data.byte_per_pixel), data.ptr);
+ cv::Mat cvFace = cvData(cv::Rect(roi._rects[0].left, roi._rects[0].top, roi._rects[0].right - roi._rects[0].left, roi._rects[0].bottom - roi._rects[0].top));
+ output.pixel_format = data.pixel_format;
+ output.byte_per_pixel = data.byte_per_pixel;
+ output.width = cvFace.cols;
+ output.height = cvFace.rows;
+ output.ptr = cvFace.data;
+ }
+}
+
+void MvFaceLandmarkDetectionMulti::invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async)
+{
+ SINGLEO_LOGD("Invoke FLD");
+ if (input._data_type != DataType::IMAGE) {
+ SINGLEO_LOGE("Invalid input type.");
+ throw invalid_argument("Input type not support.");
+ }
+
+ SINGLEO_LOGD("FLD casting");
+ ImageDataType data;
+ SINGLEO_LOGD("cropping..");
+ cropFaceRegion(input, data, auxiliary_input);
+
+ try {
+ int ret = mv_source_fill_by_buffer(_mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+ data.height, MEDIA_VISION_COLORSPACE_RGB888);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to convert to mv source.");
+
+ ret = mv_facial_landmark_inference(_handle, _mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to invoke face landmark detection.");
+
+ } catch (std::runtime_error &e) {
+ SINGLEO_LOGE("%s", e.what());
+ }
+}
+
+BaseResultType &MvFaceLandmarkDetectionMulti::result()
+{
+ unsigned long frame_number;
+ unsigned int result_cnt;
+
+ int ret = mv_facial_landmark_get_result_count(_handle, &frame_number, &result_cnt);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to get face landmark detection result count.");
+
+ _output_data._landmarks.clear();
+ _output_data._frame_number = frame_number;
+
+ for (unsigned int idx = 0; idx < result_cnt; ++idx) {
+ Point landmark;
+
+ ret = mv_facial_landmark_get_position(_handle, idx, &landmark.x, &landmark.y);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to get face landmark detection bound box.");
+
+ _output_data._landmarks.push_back(landmark);
+ SINGLEO_LOGD("idx[%2zd]: (%3zd, %3zd)", idx, landmark.x, landmark.y);
+ }
+
+ return _output_data;
+}
+
+}
+}
+}
throw runtime_error("Fail to prepare object detection.");
}
-void MvObjectDetection::invoke(BaseDataType &input, bool async, BaseResultType auxiliary_input)
+void MvObjectDetection::invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async)
{
ImageDataType &data = dynamic_cast<ImageDataType &>(input);
virtual void configure() = 0;
virtual void prepare() = 0;
- virtual void invoke(BaseDataType &input, bool async = false, BaseResultType auxiliary_input = BaseResultType(ResultType::NONE)) = 0;
+ virtual void invoke(BaseDataType &input, BaseResultType &auxiliary_input, bool async = false) = 0;
virtual BaseResultType &result() = 0;
};
{
namespace inference
{
-enum class TaskType { NONE, IMAGE_CLASSIFICATION, OBJECT_DETECTION, FACE_DETECTION };
+enum class TaskType { NONE, IMAGE_CLASSIFICATION, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK_DETECTION };
} // inference
} // singleo
void InferenceServiceDefault::invoke(BaseDataType &input, bool async)
{
- _task->invoke(input, async);
+ _task->invoke(input, BaseResultType{ResultType::NONE}, async);
}
BaseResultType &InferenceServiceDefault::result()
#include "InferenceServiceMulti.h"
#include "MvFaceDetectionMulti.h"
+#include "MvFaceLandmarkDetectionMulti.h"
#include "SingleoLog.h"
using namespace std;
switch (type) {
case TaskType::FACE_DETECTION:
_tasks.push_back(std::make_pair(type, make_unique<MvFaceDetectionMulti>()));
+ SINGLEO_LOGI("Add task %s", "FACE_DETECTION");
+ break;
+ case TaskType::FACE_LANDMARK_DETECTION:
+ _tasks.push_back(std::make_pair(type, make_unique<MvFaceLandmarkDetectionMulti>()));
+ SINGLEO_LOGI("Add task %s", "FACE_LANDMARK_DETECTION");
break;
}
}
{
BaseResultType result(ResultType::NONE);
for (auto task = _tasks.begin(); task!= _tasks.end(); ++task) {
+ task->second->invoke(input, result, async);
SINGLEO_LOGD("%d task",task->first);
- task->second->invoke(input, async, result);
if ((task+1)!= _tasks.end()) {
result = task->second->result();
}
std::unique_ptr<singleo::inference::IInferenceServiceInterface> _face_estimator;
std::unique_ptr<singleo::input::IInputService> _input_service;
- const std::vector<inference::TaskType> _tasks { inference::TaskType::FACE_DETECTION };
+ const std::vector<inference::TaskType> _tasks { inference::TaskType::FACE_DETECTION, inference::TaskType::FACE_LANDMARK_DETECTION };
public:
explicit GazeEstimator(input::InputConfigBase &config);
{
_face_estimator->invoke(input);
- auto &headPose = _face_estimator->result();
+ SINGLEO_LOGD("Invoke done");
+ auto &result = _face_estimator->result();
+ SINGLEO_LOGD("Result done");
+ if (!result._rects.empty())
+ SINGLEO_LOGD("ROI: %d, %d, %d,%d",
+ result._rects[0].top, result._rects[0].left, result._rects[0].bottom, result._rects[0].right);
- SINGLEO_LOGI("ROI: %d, %d, %d,%d",
- headPose._rects[0].top, headPose._rects[0].left, headPose._rects[0].bottom, headPose._rects[0].right);
+ auto &headPose = dynamic_cast<FldResultType&>(result);
+ SINGLEO_LOGD("Landmark: %zd, %zd",
+ headPose._landmarks[0].x, headPose._landmarks[0].y);
return PoseVector{-1, -1, -1};
}