From: Inki Dae Date: Tue, 29 Mar 2022 03:51:08 +0000 (+0900) Subject: mv_machine_learning: add data augmentation feature X-Git-Tag: submit/tizen/20220720.053259~50 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9839c1484bf360f1f863f3397f8f81aea28c3d1f;p=platform%2Fcore%2Fapi%2Fmediavision.git mv_machine_learning: add data augmentation feature [Version] 0.18.0-0 [Issue type] new feature Added data augmentation feature which generates several input images preprocessed in various ways to extend a given input data to mutiple ones. With this feature, the accuracy is a little improved. So this patch also corrects a wrong expected answer of test case. As of now, this feature supports two preprocessing classes - horizontal flip and rotation, and it uses default and flip classes in default. Change-Id: I0e18e761c020ffaa8e4cca660f631413c3d5c69c Signed-off-by: Inki Dae --- diff --git a/mv_machine_learning/face_recognition/include/face_recognition.h b/mv_machine_learning/face_recognition/include/face_recognition.h index ec7fb2c4..69524785 100644 --- a/mv_machine_learning/face_recognition/include/face_recognition.h +++ b/mv_machine_learning/face_recognition/include/face_recognition.h @@ -24,6 +24,9 @@ #include "label_manager.h" #include "face_net_info.h" #include "simple_shot.h" +#include "data_augment_default.h" +#include "data_augment_flip.h" +#include "data_augment_rotate.h" typedef struct { std::string backbone_backend_name; @@ -39,6 +42,7 @@ typedef struct { class FaceRecognition { private: FaceRecognitionConfig _config; + std::vector> _data_augments; void CheckFeatureVectorFile(std::unique_ptr& old_fvm, std::unique_ptr& new_fvm); std::unique_ptr CreateDSM(const training_engine_backend_type_e backend_type); @@ -71,6 +75,7 @@ public: std::unique_ptr& GetInternal() { return _internal; } std::unique_ptr& GetBackbone() { return _backbone; } std::vector& GetBackboneInputLayerInfo() { return _face_net_info->GetInputLayerInfo(); } + std::vector>& GetDataAugment() { return _data_augments; } }; #endif \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/src/face_recognition.cpp b/mv_machine_learning/face_recognition/src/face_recognition.cpp index d7b461fa..c412e2b6 100644 --- a/mv_machine_learning/face_recognition/src/face_recognition.cpp +++ b/mv_machine_learning/face_recognition/src/face_recognition.cpp @@ -40,7 +40,9 @@ using namespace Mediavision::MachineLearning::Exception; FaceRecognition::FaceRecognition() : _initialized(true), _prepared(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager() { - + _data_augments.push_back(std::make_shared()); + _data_augments.push_back(std::make_shared()); + /* Add other data argument classes. */ } FaceRecognition::~FaceRecognition() diff --git a/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp b/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp index 222fa5fb..537e8e9d 100644 --- a/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp +++ b/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp @@ -16,13 +16,16 @@ #include #include +#include #include "face_recognition.h" #include "feature_vector_manager.h" #include "backbone_model_info.h" #include "mv_face_recognition_open.h" +#include "machine_learning_exception.h" using namespace std; +using namespace Mediavision::MachineLearning::Exception; int mv_face_recognition_create_open(mv_face_recognition_h *handle) { @@ -142,12 +145,6 @@ int mv_face_recognition_prepare_open(mv_face_recognition_h handle) return ret; } - FaceRecognition *pFace = static_cast(handle); - - ret = pFace->Prepare(); - if (ret != MEDIA_VISION_ERROR_NONE) - LOGE("Fail to prepare face recognition."); - LOGD("LEAVE"); return ret; @@ -162,7 +159,6 @@ int mv_face_recognition_register_open(mv_face_recognition_h handle, mv_source_h return MEDIA_VISION_ERROR_INVALID_PARAMETER; } - FaceRecognition *pFace = static_cast(handle); mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID; unsigned int width = 0, height = 0, bufferSize = 0; unsigned char *buffer = NULL; @@ -180,19 +176,40 @@ int mv_face_recognition_register_open(mv_face_recognition_h handle, mv_source_h return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; } - vector src_vec; - vector& input_layer_info = pFace->GetBackboneInputLayerInfo(); - // TODO. consider mutiple tensor info. - unsigned int re_width = input_layer_info[0].tensor_info.shape[0]; - unsigned int re_height = input_layer_info[0].tensor_info.shape[1]; - - LOGD("Convert mv source(WxH) : %d x %d => %d x %d", width, height, re_width, re_height); - - FeatureVectorManager::GetVecFromRGB(buffer, src_vec, width, height, re_width, re_height); + FaceRecognition *pFace = static_cast(handle); - int ret = pFace->RegisterNewFace(src_vec, string(label)); - if (ret != MEDIA_VISION_ERROR_NONE) - LOGE("Fail to register new face."); + auto data_augments = pFace->GetDataAugment(); + int ret = MEDIA_VISION_ERROR_NONE; + + for (auto& data_augment : data_augments) { + ret = pFace->Prepare(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to prepare face recognition."); + break; + } + + vector& input_layer_info = pFace->GetBackboneInputLayerInfo(); + // TODO. consider mutiple tensor info. + unsigned int re_width = input_layer_info[0].tensor_info.shape[0]; + unsigned int re_height = input_layer_info[0].tensor_info.shape[1]; + vector src_vec; + + try { + data_augment->Preprocess(buffer, src_vec, width, height, re_width, re_height); + } catch (InvalidParameter& e) { + LOGE("%s", e.what()); + ret = e.getError(); + break; + } + + ret = pFace->RegisterNewFace(src_vec, string(label)); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to register new face."); + break; + } + + src_vec.clear(); + } LOGD("LEAVE"); diff --git a/mv_machine_learning/training/include/data_augment.h b/mv_machine_learning/training/include/data_augment.h new file mode 100644 index 00000000..b941f5e0 --- /dev/null +++ b/mv_machine_learning/training/include/data_augment.h @@ -0,0 +1,46 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DATA_AUGMENT_H__ +#define __DATA_AUGMENT_H__ + +#include +#include + +#include +#include + +enum { + DATA_AUGMENT_FLIP = 0, + DATA_AUGMENT_ROTATE, +}; + +class DataAugment { +protected: + unsigned int _type; + unsigned int _degree; + +public: + DataAugment(); + DataAugment(unsigned int degree); + virtual ~DataAugment(); + + void Resize(cv::Mat& src, std::vector& vec, int width, int height); + virtual void Preprocess(unsigned char *in_data, std::vector& out_vec, + int width, int height, int re_width, int re_height); +}; + +#endif \ No newline at end of file diff --git a/mv_machine_learning/training/include/data_augment_default.h b/mv_machine_learning/training/include/data_augment_default.h new file mode 100644 index 00000000..04e3c179 --- /dev/null +++ b/mv_machine_learning/training/include/data_augment_default.h @@ -0,0 +1,34 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DATA_AUGMENT_DEFAULT_H__ +#define __DATA_AUGMENT_DEFAULT_H__ + +#include +#include + +#include "data_augment.h" + +class DataAugmentDefault : public DataAugment { +public: + DataAugmentDefault(); + ~DataAugmentDefault(); + + void Preprocess(unsigned char *in_data, std::vector& out_vec, + int width, int height, int re_width, int re_height) final; +}; + +#endif \ No newline at end of file diff --git a/mv_machine_learning/training/include/data_augment_flip.h b/mv_machine_learning/training/include/data_augment_flip.h new file mode 100644 index 00000000..69c115eb --- /dev/null +++ b/mv_machine_learning/training/include/data_augment_flip.h @@ -0,0 +1,34 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DATA_AUGMENT_FLIP_H__ +#define __DATA_AUGMENT_FLIP_H__ + +#include +#include + +#include "data_augment.h" + +class DataAugmentFlip : public DataAugment { +public: + DataAugmentFlip(); + ~DataAugmentFlip(); + + void Preprocess(unsigned char *in_data, std::vector& out_vec, + int width, int height, int re_width, int re_height) final; +}; + +#endif \ No newline at end of file diff --git a/mv_machine_learning/training/include/data_augment_rotate.h b/mv_machine_learning/training/include/data_augment_rotate.h new file mode 100644 index 00000000..428bb45e --- /dev/null +++ b/mv_machine_learning/training/include/data_augment_rotate.h @@ -0,0 +1,34 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DATA_AUGMENT_ROTATE_H__ +#define __DATA_AUGMENT_ROTATE_H__ + +#include +#include + +#include "data_augment.h" + +class DataAugmentRotate : public DataAugment { +public: + DataAugmentRotate(unsigned int degree); + ~DataAugmentRotate(); + + void Preprocess(unsigned char *in_data, std::vector& out_vec, + int width, int height, int re_width, int re_height) final; +}; + +#endif \ No newline at end of file diff --git a/mv_machine_learning/training/src/data_augment.cpp b/mv_machine_learning/training/src/data_augment.cpp new file mode 100644 index 00000000..09bc734a --- /dev/null +++ b/mv_machine_learning/training/src/data_augment.cpp @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "data_augment.h" + +using namespace std; + +DataAugment::DataAugment() : _type(), _degree() +{ + +} + +DataAugment::DataAugment(unsigned int degree) : _type(), _degree(degree) +{ + +} + +DataAugment::~DataAugment() +{ + +} + +void DataAugment::Resize(cv::Mat& src, vector& out_vec, int width, int height) +{ + cv::Mat resized; + + resize(src, resized, cv::Size(width, height), 0, 0, cv::INTER_CUBIC); + + cv::Mat floatSrc; + + resized.convertTo(floatSrc, CV_32FC3); + + cv::Mat meaned = cv::Mat(floatSrc.size(), CV_32FC3, cv::Scalar(127.5f, 127.5f, 127.5f)); + cv::Mat dst; + + cv::subtract(floatSrc, meaned, dst); + dst /= 127.5f; + + out_vec.assign((float *)dst.data, (float *)dst.data + dst.total() * dst.channels()); +} + +void DataAugment::Preprocess(unsigned char *in_data, std::vector& out_vec, + int width, int height, int re_width, int re_height) +{ + return Preprocess(in_data, out_vec, width, height, re_width, re_height); +} \ No newline at end of file diff --git a/mv_machine_learning/training/src/data_augment_default.cpp b/mv_machine_learning/training/src/data_augment_default.cpp new file mode 100644 index 00000000..dffacc59 --- /dev/null +++ b/mv_machine_learning/training/src/data_augment_default.cpp @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "data_augment_default.h" + +using namespace std; + +DataAugmentDefault::DataAugmentDefault() : DataAugment() +{ + +} + +DataAugmentDefault::~DataAugmentDefault() +{ + +} + +void DataAugmentDefault::Preprocess(unsigned char *in_data, vector& out_vec, + int width, int height, int re_width, int re_height) +{ + cv::Mat cvSrc = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), in_data).clone(); + + Resize(cvSrc, out_vec, re_width, re_height); +} \ No newline at end of file diff --git a/mv_machine_learning/training/src/data_augment_flip.cpp b/mv_machine_learning/training/src/data_augment_flip.cpp new file mode 100644 index 00000000..648363bf --- /dev/null +++ b/mv_machine_learning/training/src/data_augment_flip.cpp @@ -0,0 +1,41 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "data_augment_flip.h" + +using namespace std; + +DataAugmentFlip::DataAugmentFlip() : DataAugment() +{ + +} + +DataAugmentFlip::~DataAugmentFlip() +{ + +} + +void DataAugmentFlip::Preprocess(unsigned char *in_data, vector& out_vec, + int width, int height, int re_width, int re_height) +{ + cv::Mat cvSrc = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), in_data).clone(); + + cv::Mat cvFlip; + + cv::flip(cvSrc, cvFlip, 1); + + Resize(cvFlip, out_vec, re_width, re_height); +} \ No newline at end of file diff --git a/mv_machine_learning/training/src/data_augment_rotate.cpp b/mv_machine_learning/training/src/data_augment_rotate.cpp new file mode 100644 index 00000000..9905b483 --- /dev/null +++ b/mv_machine_learning/training/src/data_augment_rotate.cpp @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "machine_learning_exception.h" +#include "data_augment_rotate.h" + +using namespace std; +using namespace Mediavision::MachineLearning::Exception; + +DataAugmentRotate::DataAugmentRotate(unsigned int degree) : DataAugment(degree) +{ + +} + +DataAugmentRotate::~DataAugmentRotate() +{ + +} + +void DataAugmentRotate::Preprocess(unsigned char *in_data, vector& out_vec, + int width, int height, int re_width, int re_height) +{ + cv::Mat cvSrc = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), in_data).clone(); + + cv::Mat cvRotate; + int rotate_code = 0; + + switch (_degree) { + case 90: + rotate_code = cv::ROTATE_90_CLOCKWISE; + break; + case -90: + case 270: + rotate_code = cv::ROTATE_90_COUNTERCLOCKWISE; + break; + case 180: + rotate_code = cv::ROTATE_180; + break; + default: + throw InvalidParameter("Invalid degree value."); + } + + cv::rotate(cvSrc, cvRotate, rotate_code); + + Resize(cvRotate, out_vec, re_width, re_height); +} diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 3fa33c7b..ce52cb44 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,6 +1,6 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.17.3 +Version: 0.18.0 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause diff --git a/test/testsuites/machine_learning/face_recognition/test_face_recognition.cpp b/test/testsuites/machine_learning/face_recognition/test_face_recognition.cpp index c3c83922..5e465448 100644 --- a/test/testsuites/machine_learning/face_recognition/test_face_recognition.cpp +++ b/test/testsuites/machine_learning/face_recognition/test_face_recognition.cpp @@ -133,7 +133,7 @@ TEST(FaceRecognitionTest, FaceRecognitionClassWithEachLabelRemovalShouldBeOk) { "7779", "7779", "2929", "2929", "7779", "2929", "7779", "2929", "2929", "7779", "2929", "7779", "7779", "7779", "7779" }, - { "7779", "3448", "none", "none", "3448", + { "3448", "3448", "none", "none", "3448", "3448", "7779", "none", "none", "3448", "none", "7779", "7779", "7779", "7779" }, { "3448", "3448", "2929", "2929", "3448",