Add face recognition task node support to Autozoom service.
With this, this patch adds face recognition task for Mediavision
and then adds the face recognition task node in graph pipeline
of Autozoom service.
And also it supports runtime behavior change of the graph pipeline.
I.e., graph pipeline of Autozoom service is as follows,
input --- face detection --- bridge --- face recognition --- endpoint
| |
-------------------------------------------
In this case, we have to consider for two cases,
- if face detection node failed then bridge node should stop to invoke
face recognition node.
- if face detection node worked then bridge node should invoke the
face recognition node.
This means that bridge node has to decide whether next sub graph pipeline
should be invoked or not in runtime. This patch makes this behavior to be
possible.
Ps. face recognition needs pre-trained model so install the model file
after training the model using face recognition training API.
Otherwise, face recognition result will always be 'none'.
Change-Id: I0adfd553285b2e22f551e38fc95a406a0fd9acd5
Signed-off-by: Inki Dae <inki.dae@samsung.com>
}
};
-enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION };
+enum class ResultType { NONE, OBJECT_DETECTION, FACE_DETECTION, FACE_LANDMARK, IMAGE_CLASSIFICATION, FACE_RECOGNITION };
struct BaseResultType {
ResultType _type { ResultType::NONE };
}
};
+struct FrResultType : public BaseResultType {
+ FrResultType() : BaseResultType(ResultType::FACE_RECOGNITION)
+ {}
+
+ std::string _label;
+
+ std::shared_ptr<BaseResultType> clone() override
+ {
+ return std::make_shared<FrResultType>(*this);
+ }
+};
+
enum class ServiceType { NONE, AUTO_ZOOM };
enum class InputFeedType { NONE, CAMERA, SCREEN_CAPTURE };
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceLandmark.cpp
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvObjectDetection.cpp
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvImageClassification.cpp
+ ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvFaceRecognition.cpp
${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/src/MvInferenceTaskFactory.cpp
)
-LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification)
+LIST(APPEND INFERENCE_LIBRARY_LIST ${INFERENCE_LIBRARY_LIST} mv_common mv_inference mv_object_detection mv_landmark_detection mv_image_classification mv_face_recognition)
LIST(APPEND INFERENCE_HEADER_LIST ${INFERENCE_HEADER_LIST} ${INFERENCE_MEDIAVISION_BACKEND_DIRECTORY}/include /usr/include/media)
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MV_FACE_RECOGNITION_H__
+#define __MV_FACE_RECOGNITION_H__
+
+#include "IInferenceTaskInterface.h"
+#include "mv_face_recognition.h"
+#include "SingleoCommonTypes.h"
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+class MvFaceRecognition : public IInferenceTaskInterface
+{
+private:
+ mv_face_recognition_h _handle {};
+ FrResultType _output_data {};
+
+public:
+ MvFaceRecognition();
+ virtual ~MvFaceRecognition();
+
+ void configure() override;
+ void prepare() override;
+ void invoke(BaseDataType &input, bool async) override;
+ BaseResultType &result() override;
+};
+
+} // backends
+} // inference
+} // singleo
+
+#endif
std::unique_ptr<IInferenceTaskInterface> createObjectDetection() override;
std::unique_ptr<IInferenceTaskInterface> createFaceDetection() override;
std::unique_ptr<IInferenceTaskInterface> createFaceLandmarkDetection() override;
+ std::unique_ptr<IInferenceTaskInterface> createFaceRecognition() override;
};
}
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+#include "MvFaceRecognition.h"
+#include "SingleoLog.h"
+
+using namespace std;
+
+namespace singleo
+{
+namespace inference
+{
+namespace backends
+{
+MvFaceRecognition::MvFaceRecognition()
+{
+ int ret = mv_face_recognition_create(&_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to create face detection handle.");
+}
+
+MvFaceRecognition::~MvFaceRecognition()
+{
+ mv_face_recognition_destroy(_handle);
+}
+
+void MvFaceRecognition::configure()
+{
+ // Nothing to do.
+}
+
+void MvFaceRecognition::prepare()
+{
+ int ret = mv_face_recognition_prepare(_handle);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to prepare face detection.");
+}
+
+void MvFaceRecognition::invoke(BaseDataType &input, bool async)
+{
+ ImageDataType &data = dynamic_cast<ImageDataType &>(input);
+
+ if (data._data_type != DataType::IMAGE) {
+ SINGLEO_LOGE("Invalid input type.");
+ throw invalid_argument("Input type not support.");
+ }
+
+ mv_source_h mv_src;
+
+ int ret = mv_create_source(&mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to create mv source.");
+
+ try {
+ ret = mv_source_fill_by_buffer(mv_src, data.ptr, data.width * data.height * data.byte_per_pixel, data.width,
+ data.height, MEDIA_VISION_COLORSPACE_RGB888);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to convert to mv source.");
+
+ ret = mv_face_recognition_inference(_handle, mv_src);
+ if (ret != MEDIA_VISION_ERROR_NO_DATA && ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to invoke face detection.");
+ } catch (std::runtime_error &e) {
+ SINGLEO_LOGE("%s", e.what());
+ }
+
+ ret = mv_destroy_source(mv_src);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to destroy mv source.");
+}
+
+BaseResultType &MvFaceRecognition::result()
+{
+ const char *out_label {};
+
+ int ret = mv_face_recognition_get_label(_handle, &out_label);
+ if (ret != MEDIA_VISION_ERROR_NO_DATA) {
+ _output_data._label = "none";
+ return _output_data;
+ }
+
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw runtime_error("Fail to get label.");
+
+ _output_data._label = out_label;
+
+ return _output_data;
+}
+
+}
+}
+}
#include "MvInferenceTaskFactory.h"
#include "MvFaceDetection.h"
#include "MvFaceLandmark.h"
+#include "MvFaceRecognition.h"
#include "MvObjectDetection.h"
#include "MvImageClassification.h"
#include "SingleoLog.h"
return make_unique<MvFaceLandmark>();
}
+std::unique_ptr<IInferenceTaskInterface> MvInferenceTaskFactory::createFaceRecognition()
+{
+ return make_unique<MvFaceRecognition>();
+}
+
}
}
virtual std::unique_ptr<IInferenceTaskInterface> createObjectDetection() = 0;
virtual std::unique_ptr<IInferenceTaskInterface> createFaceDetection() = 0;
virtual std::unique_ptr<IInferenceTaskInterface> createFaceLandmarkDetection() = 0;
+ virtual std::unique_ptr<IInferenceTaskInterface> createFaceRecognition() = 0;
};
}
#include "InputTypes.h"
#include "Postprocessor.h"
#include "InferenceNode.h"
+#include "BridgeNode.h"
#include "EndpointNode.h"
using namespace std;
{
REGISTER_SERVICE(AutoZoom)
+void BridgeNodeCallback(INode *node)
+{
+ auto callbackNode = dynamic_cast<CallbackNode *>(node);
+ auto &inputBuffer = callbackNode->getInputBuffer();
+ auto imageData = dynamic_pointer_cast<ImageDataType>(inputBuffer->getInputs()[0]);
+
+ cv::Mat cv_image(cv::Size(imageData->width, imageData->height), CV_MAKETYPE(CV_8U, 3), imageData->ptr);
+ auto outputBuffer = make_shared<SharedBuffer>();
+
+ auto &results = callbackNode->results();
+ for (auto r : results) {
+ if (r->_type != ResultType::FACE_DETECTION)
+ continue;
+
+ auto f_r = dynamic_pointer_cast<FdResultType>(r);
+
+ for (auto rect : f_r->_rects) {
+ if (rect.left < 0 || rect.top < 0 || rect.right - rect.left >= static_cast<int>(imageData->width) ||
+ rect.bottom - rect.top >= static_cast<int>(imageData->height))
+ continue;
+
+ cv::Rect roi(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top);
+ cv::Mat roi_img = cv_image(roi);
+ ImageDataType faceImage;
+
+ faceImage.width = roi_img.cols;
+ faceImage.height = roi_img.rows;
+ faceImage.byte_per_pixel = roi_img.channels();
+ faceImage.pixel_format = ImagePixelFormat::RGB888;
+ faceImage.is_owned = true;
+
+ size_t faceSize = roi_img.cols * roi_img.rows * roi_img.channels();
+
+ faceImage.ptr = new unsigned char[faceSize];
+ memcpy(faceImage.ptr, roi_img.data, faceSize);
+
+ outputBuffer->addInput(make_shared<ImageDataType>(faceImage));
+ }
+ }
+
+ // Face detection failed so do not go forward.
+ if (outputBuffer->getInputs().size() == 0)
+ return;
+
+ callbackNode->setOutputBuffer(outputBuffer);
+}
+
AutoZoom::AutoZoom()
{
// In default, we will use Inference service factory for Mediavision to use Mediavision framework
// for inference service. TODO. introduce meta config file approach later.
auto factory = InferenceTaskFactory::instance().create("MvInferenceTaskFactory");
+ _taskManager = make_unique<TaskManager>();
+
auto face_detection_node = make_shared<InferenceNode>("face_detection");
face_detection_node->setInferenceTask(factory->createFaceDetection());
- _taskManager = make_unique<TaskManager>();
_taskManager->addNode(face_detection_node);
+ auto bridge_node = make_shared<BridgeNode>("bridge_node");
+
+ bridge_node->addDependency(face_detection_node);
+ bridge_node->setCb(BridgeNodeCallback);
+ _taskManager->addNode(bridge_node);
+
+ auto face_recognition_node = make_shared<InferenceNode>("face_recognition_node");
+
+ face_recognition_node->setInferenceTask(factory->createFaceRecognition());
+ face_recognition_node->addDependency(bridge_node);
+ _taskManager->addNode(face_recognition_node);
+
auto endpoint_node = make_shared<EndpointNode>();
- endpoint_node->setCb(nullptr);
endpoint_node->addDependency(face_detection_node);
+ endpoint_node->addDependency(face_recognition_node);
_taskManager->addNode(endpoint_node);
_postprocessor = make_unique<Postprocessor>();
{
if (_async_mode)
_input_service->streamOff();
+
+ _taskManager->clear();
}
void AutoZoom::inputFeedCb(BaseDataType &data)
void AutoZoom::updateResult(BaseDataType &in_data)
{
- auto &output_data = _taskManager->output();
+ AutoZoomResult autozoom_result;
- if (output_data[0]->_type != ResultType::OBJECT_DETECTION && output_data[0]->_type != ResultType::FACE_DETECTION)
- throw InvalidParameter("Invalid result type");
+ for (auto &output : _taskManager->output()) {
+ if (output->_type != ResultType::OBJECT_DETECTION && output->_type != ResultType::FACE_DETECTION &&
+ output->_type != ResultType::FACE_RECOGNITION)
+ throw InvalidParameter("Invalid result type");
- vector<Rect> &rects = dynamic_cast<FdResultType &>(*output_data[0])._rects;
- AutoZoomResult autozoom_result;
+ if (output->_type == ResultType::OBJECT_DETECTION || output->_type == ResultType::FACE_DETECTION) {
+ vector<Rect> &rects = dynamic_cast<FdResultType &>(*output)._rects;
- autozoom_result.frame_number = output_data[0]->_frame_number;
- autozoom_result.num_of_objects = rects.size();
+ autozoom_result.frame_number = output->_frame_number;
+ autozoom_result.num_of_objects = rects.size();
- for (size_t idx = 0; idx < rects.size(); ++idx) {
- SINGLEO_LOGD("%dx%d ~ %dx%d", rects[idx].left, rects[idx].top, rects[idx].right, rects[idx].bottom);
+ for (size_t idx = 0; idx < rects.size(); ++idx) {
+ SINGLEO_LOGD("%dx%d ~ %dx%d", rects[idx].left, rects[idx].top, rects[idx].right, rects[idx].bottom);
- if (idx == 0) {
- autozoom_result.merged_rect = rects[idx];
- continue;
- }
- autozoom_result.merged_rect.left = min(rects[idx].left, autozoom_result.merged_rect.left);
- autozoom_result.merged_rect.top = min(rects[idx].top, autozoom_result.merged_rect.top);
- autozoom_result.merged_rect.right = max(rects[idx].right, autozoom_result.merged_rect.right);
- autozoom_result.merged_rect.bottom = max(rects[idx].bottom, autozoom_result.merged_rect.bottom);
- }
+ if (idx == 0) {
+ autozoom_result.merged_rect = rects[idx];
+ continue;
+ }
- SINGLEO_LOGD("detected object count = %zu", autozoom_result.num_of_objects);
+ autozoom_result.merged_rect.left = min(rects[idx].left, autozoom_result.merged_rect.left);
+ autozoom_result.merged_rect.top = min(rects[idx].top, autozoom_result.merged_rect.top);
+ autozoom_result.merged_rect.right = max(rects[idx].right, autozoom_result.merged_rect.right);
+ autozoom_result.merged_rect.bottom = max(rects[idx].bottom, autozoom_result.merged_rect.bottom);
+ }
- if (autozoom_result.num_of_objects == 0) {
- _result = autozoom_result;
- SINGLEO_LOGW("No detected objects.");
- return;
+ SINGLEO_LOGD("detected object count = %zu", autozoom_result.num_of_objects);
+
+ if (autozoom_result.num_of_objects == 0) {
+ _result = autozoom_result;
+ SINGLEO_LOGW("No detected objects.");
+ return;
+ }
+
+ } else if (output->_type == ResultType::FACE_RECOGNITION) {
+ auto &frResult = dynamic_cast<FrResultType &>(*output);
+ SINGLEO_LOGD("label : %s", frResult._label.c_str());
+ }
}
if (_async_mode)
{
class BridgeNode : public CallbackNode
{
+private:
+ bool _enabled { false };
+
public:
BridgeNode(std::string name = "bridge")
{
_cb(this);
_inputBuffer->release();
+ _enabled = false;
+
+ // Bridge node got the result from previous task node so enable this bridge node.
+ if (_outputBuffer)
+ _enabled = true;
+ }
+
+ bool isEnabled()
+ {
+ return _enabled;
}
};
{
return _name;
}
- void setInputBuffer(std::shared_ptr<SharedBuffer> inputBuffer) override;
+ void setInputBuffer(std::shared_ptr<SharedBuffer> &inputBuffer) override;
std::shared_ptr<SharedBuffer> &getInputBuffer() override;
void addDependency(std::shared_ptr<INode> node) override;
std::vector<std::shared_ptr<INode> > &getDependencies() override;
void wakeup() override;
virtual void configure() = 0;
virtual void invoke() = 0;
+ void clear() override;
void setCb(const NodeCb &cb);
std::vector<std::shared_ptr<BaseResultType> > &results() override;
};
virtual NodeType getType() = 0;
virtual std::string &getName() = 0;
- virtual void setInputBuffer(std::shared_ptr<SharedBuffer> inputBuffer) = 0;
+ virtual void setInputBuffer(std::shared_ptr<SharedBuffer> &inputBuffer) = 0;
virtual std::shared_ptr<SharedBuffer> &getInputBuffer() = 0;
virtual void addDependency(std::shared_ptr<INode> node) = 0;
virtual void addNext(std::shared_ptr<INode> node) = 0;
virtual std::vector<std::shared_ptr<BaseResultType> > &results() = 0;
virtual void wait() = 0;
virtual void wakeup() = 0;
+ virtual void clear() = 0;
};
using NodeCb = std::function<void(INode *node)>;
{
return _name;
}
- void setInputBuffer(std::shared_ptr<SharedBuffer> inputBuffer) override;
+ void setInputBuffer(std::shared_ptr<SharedBuffer> &inputBuffer) override;
std::shared_ptr<SharedBuffer> &getInputBuffer() override;
void addDependency(std::shared_ptr<INode> node) override;
std::vector<std::shared_ptr<INode> > &getDependencies() override;
void wakeup() override;
virtual void configure() = 0;
virtual void invoke() = 0;
+ void clear() override;
virtual std::vector<std::shared_ptr<BaseResultType> > &results() = 0;
};
return _type;
}
-void CallbackNode::setInputBuffer(shared_ptr<SharedBuffer> inputBuffer)
+void CallbackNode::setInputBuffer(std::shared_ptr<SharedBuffer> &inputBuffer)
{
inputBuffer->addRef();
_inputBuffer = inputBuffer;
}
-shared_ptr<SharedBuffer> &CallbackNode::getInputBuffer()
+std::shared_ptr<SharedBuffer> &CallbackNode::getInputBuffer()
{
return _inputBuffer;
}
_event.notify_all();
}
+void CallbackNode::clear()
+{
+ _inputBuffer = nullptr;
+ _outputBuffer = nullptr;
+}
+
}
}
\ No newline at end of file
{
void TaskManager::threadCb(shared_ptr<INode> &node)
{
- SINGLEO_LOGD("Launched node name = %s", node->getName().c_str());
-
// Wait until all nodes added to this node as dependency are completed
for (auto &d : node->getDependencies())
d->wait();
+ SINGLEO_LOGD("Launched node name = %s", node->getName().c_str());
+
if (node->getType() == NodeType::INFERENCE) {
if (_inputs[0]->_data_type != DataType::IMAGE) {
SINGLEO_LOGE("Invalid input data type.");
// Spawn threads for next nodes
for (auto &n : node->getNexts()) {
+ if (node->getType() == NodeType::BRIDGE) {
+ auto b_node = dynamic_pointer_cast<BridgeNode>(node);
+
+ // In case of BRIDGE node, if this bridge node didn't get the result from previous task node,
+ // isEnabled() is false. So if isEnabled() is false, stop all sub graph pipelines connected to this node.
+ if (!b_node->isEnabled()) {
+ n->wakeup();
+ continue;
+ }
+ }
+
std::lock_guard<std::mutex> lock(_thread_mutex);
if (_is_thread_created.find(n) == _is_thread_created.end()) {
_threads.push(make_shared<thread>(&TaskManager::threadCb, this, std::ref(n)));
t->join();
}
+
_is_thread_created.clear();
_inputs.clear();
+
+ // the result has been returned to user so clear each node.
+ // Ps. clear() of each node should be called after graph pipeline is completed.
+ // because SharedBuffer can be shared between nodes.
+ for (const auto &node : _nodes)
+ node->clear();
}
vector<shared_ptr<BaseResultType> > &TaskManager::output()
return _type;
}
-void TaskNode::setInputBuffer(shared_ptr<SharedBuffer> inputBuffer)
+void TaskNode::setInputBuffer(std::shared_ptr<SharedBuffer> &inputBuffer)
{
inputBuffer->addRef();
_inputBuffer = inputBuffer;
_event.notify_all();
}
+void TaskNode::clear()
+{
+ _inputBuffer = nullptr;
+ _outputBuffer = nullptr;
+}
+
}
}
\ No newline at end of file