--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FLD_U2NET_H__
+#define __FLD_U2NET_H__
+
+#include <memory>
+#include <mv_common.h>
+
+#include "LandmarkDetection.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T> class FldU2net : public LandmarkDetection<T>
+{
+ using LandmarkDetection<T>::_config;
+ using LandmarkDetection<T>::_preprocess;
+ using LandmarkDetection<T>::_inference;
+
+private:
+ unsigned int _numberOfLandmarks;
+ LandmarkDetectionResult _result;
+
+public:
+ FldU2net(LandmarkDetectionTaskType task_type, std::shared_ptr<Config> config);
+ ~FldU2net();
+
+ LandmarkDetectionResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
*/
#include "FacialLandmarkAdapter.h"
+#include "FldU2net.h"
#include "MvMlException.h"
#include "mv_landmark_detection_config.h"
case LandmarkDetectionTaskType::FLD_TWEAK_CNN:
_landmark_detection = make_unique<FldTweakCnn<U> >(task_type, _config);
break;
+ case LandmarkDetectionTaskType::FLD_U2NET:
+ _landmark_detection = make_unique<FldU2net<U> >(task_type, _config);
+ break;
default:
throw InvalidOperation("Invalid landmark detection task type.");
}
if (model_name == "FLD_TWEAK_CNN")
return LandmarkDetectionTaskType::FLD_TWEAK_CNN;
+ else if (model_name == "FLD_U2NET")
+ return LandmarkDetectionTaskType::FLD_U2NET;
// TODO.
throw InvalidParameter("Invalid facial detection model name.");
--- /dev/null
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string.h>
+
+#include "FldU2net.h"
+#include "MvMlException.h"
+#include "Postprocess.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+FldU2net<T>::FldU2net(LandmarkDetectionTaskType task_type, std::shared_ptr<Config> config)
+ : LandmarkDetection<T>(task_type, config), _result()
+{
+ _numberOfLandmarks = 68;
+}
+
+template<typename T> FldU2net<T>::~FldU2net()
+{}
+
+template<typename T> LandmarkDetectionResult &FldU2net<T>::result()
+{
+ // Clear _result object because result() function can be called every time user wants
+ // so make sure to clear existing result data before getting the data again.
+ _result = LandmarkDetectionResult();
+
+ vector<string> names;
+
+ LandmarkDetection<T>::getOutputNames(names);
+
+ auto scoreMetaInfo = _config->getOutputMetaMap().at(names[0]);
+ auto decodingLandmark =
+ static_pointer_cast<DecodingLandmark>(scoreMetaInfo->decodingTypeMap[DecodingType::LANDMARK]);
+
+ if (decodingLandmark->decoding_type != LandmarkDecodingType::BYPASS)
+ throw InvalidOperation("decoding type not support.");
+
+ if (decodingLandmark->coordinate_type != LandmarkCoordinateType::RATIO)
+ throw InvalidOperation("coordinate type not support.");
+
+ if (decodingLandmark->landmark_type != LandmarkType::SINGLE_2D)
+ throw InvalidOperation("landmark type not support.");
+
+ auto ori_src_width = static_cast<double>(_preprocess.getImageWidth()[0]);
+ auto ori_src_height = static_cast<double>(_preprocess.getImageHeight()[0]);
+ auto input_tensor_width = static_cast<double>(_inference->getInputWidth());
+ auto input_tensor_height = static_cast<double>(_inference->getInputHeight());
+
+ _result.number_of_landmarks = _numberOfLandmarks;
+
+ vector<float> score_tensor;
+
+ LandmarkDetection<T>::getOutputTensor(names[0], score_tensor);
+
+ // Calculate the ratio[A] between the original image size and the input tensor size.
+ auto width_ratio = ori_src_width / input_tensor_width;
+ auto height_ratio = ori_src_height / input_tensor_height;
+
+ // In case that landmark coordinate type is RATIO, output tensor buffer contains ratio values indicating
+ // the position of each landmark for the input tensor.
+ // Therefore, each landmark position for original image is as following,
+ // x = [width A] * width of input tensor * width ratio value of output tensor.
+ // y = [height A] * height of input tensor * height ratio value of output tensor.
+ for (unsigned int idx = 0; idx < _numberOfLandmarks; ++idx) {
+ _result.x_pos.push_back(
+ static_cast<unsigned int>(width_ratio * input_tensor_width * score_tensor[idx + idx * 1]));
+ _result.y_pos.push_back(
+ static_cast<unsigned int>(height_ratio * input_tensor_height * score_tensor[idx + idx * 1 + 1]));
+ }
+
+ return _result;
+}
+
+template class FldU2net<unsigned char>;
+template class FldU2net<float>;
+
+}
+}
\ No newline at end of file