From: Inki Dae Date: Mon, 12 Dec 2022 09:37:29 +0000 (+0900) Subject: mv_machine_learning: code refactoring to face recognition X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Fsandbox%2Finki.dae%2Fface_recognition_refactoring;p=platform%2Fcore%2Fapi%2Fmediavision.git mv_machine_learning: code refactoring to face recognition Change-Id: Ia38c4dc9c45b3216d88acf5f80befd2c02da2aad Signed-off-by: Inki Dae --- diff --git a/mv_machine_learning/face_recognition/CMakeLists.txt b/mv_machine_learning/face_recognition/CMakeLists.txt index c51d75d0..6059e4f2 100644 --- a/mv_machine_learning/face_recognition/CMakeLists.txt +++ b/mv_machine_learning/face_recognition/CMakeLists.txt @@ -2,7 +2,7 @@ project(${MV_FACE_RECOG_LIB_NAME}) cmake_minimum_required(VERSION 2.6...3.13) pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common training-engine-interface-common) -file(GLOB MV_FACE_RECOG_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") +file(GLOB MV_FACE_RECOG_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/../meta/src/*.cpp") find_package(OpenCV REQUIRED dnn imgproc) if(NOT OpenCV_FOUND) diff --git a/mv_machine_learning/face_recognition/include/face_net_info.h b/mv_machine_learning/face_recognition/include/face_net_info.h deleted file mode 100644 index 8e547046..00000000 --- a/mv_machine_learning/face_recognition/include/face_net_info.h +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __FACE_NET_INFO_H__ -#define __FACE_NET_INFO_H__ - -#include "backbone_model_info.h" - -class FaceNetInfo : public IBackboneModelInfo -{ -private: - std::vector _input_layer_info; - std::vector _output_layer_info; - std::string _model_file_path; - -public: - FaceNetInfo(std::string model_file_path); - ~FaceNetInfo(); - - std::vector &GetInputLayerInfo() override; - std::vector &GetOutputLayerInfo() override; - std::string GetModelFilePath() override; -}; - -#endif \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/include/face_recognition.h b/mv_machine_learning/face_recognition/include/face_recognition.h index 0a4620ac..b0e70be4 100644 --- a/mv_machine_learning/face_recognition/include/face_recognition.h +++ b/mv_machine_learning/face_recognition/include/face_recognition.h @@ -50,37 +50,39 @@ enum { REGISTER = 0, INFERENCE, DELETE }; } // face_recognition -typedef struct { +struct face_recognition_register_input_s { + std::vector data; + std::string label; +}; + +struct mv_face_recognition_input_s { unsigned int mode; - std::unordered_map register_src; - mv_source_h inference_src; + std::vector register_src; + std::vector> inputs; std::vector labels; -} mv_face_recognition_input_s; +}; /** * @brief The face recognition result structure. * @details Contains face recognition result such as label, label index, raw data, * and raw data count. */ -typedef struct { +struct mv_face_recognition_result_s { unsigned int label_idx; /**< label index of label file. */ std::vector raw_data; /**< raw data to each label. */ std::string label; /**< label string. */ -} mv_face_recognition_result_s; +}; -typedef struct { +struct FaceRecognitionConfig { mv_inference_target_device_e training_target_device_type; mv_inference_backend_type_e training_engine_backend_type; mv_inference_target_device_e inference_target_device_type; mv_inference_backend_type_e inference_engine_backend_type; - mv_inference_target_device_e backbone_target_device_type; - mv_inference_backend_type_e backbone_engine_backend_type; - std::string backbone_model_file_path; std::string internal_model_file_path; std::string label_file_path; std::string feature_vector_file_path; double decision_threshold; -} FaceRecognitionConfig; +}; class FaceRecognition { @@ -113,8 +115,8 @@ public: int Initialize(); void SetConfig(FaceRecognitionConfig &config); - int RegisterNewFace(mv_source_h img_src, std::string label_name); - int RecognizeFace(mv_source_h img_src); + int RegisterNewFace(std::vector &input_vec, std::string label_name); + int RecognizeFace(std::vector &input_vec); int DeleteLabel(std::string label_name); int GetLabel(const char **out_label); mv_face_recognition_result_s &GetResult(); diff --git a/mv_machine_learning/face_recognition/include/face_recognition_adapter.h b/mv_machine_learning/face_recognition/include/face_recognition_adapter.h index d77d938f..c63c3848 100644 --- a/mv_machine_learning/face_recognition/include/face_recognition_adapter.h +++ b/mv_machine_learning/face_recognition/include/face_recognition_adapter.h @@ -23,39 +23,6 @@ #include "itask.h" #include "face_recognition.h" -/** - * @brief Defines #MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH - * to set the backbone model file path. - * @details This model file is used to extract the feature vectors from a given face image data. - * - * @since_tizen 7.0 - * @see mv_engine_config_set_string_attribute() - * @see mv_engine_config_get_string_attribute() - */ -#define MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH "MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH" - -/** - * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH - * to set the path where the training relevant files are created. - * @details This path is used as a default location where the trained model, label and feature vector files are created. - * - * @since_tizen 7.0 - * @see mv_engine_config_set_string_attribute() - * @see mv_engine_config_get_string_attribute() - */ -#define MV_FACE_RECOGNITION_DEFAULT_PATH "MV_FACE_RECOGNITION_DEFAULT_PATH" - -/** - * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD - * to set the decision threshold file+. - * @details This file is used to determine face recognition result with a given face image data is true or false.. - * - * @since_tizen 7.0 - * @see mv_engine_config_set_string_attribute() - * @see mv_engine_config_get_string_attribute() - */ -#define MV_FACE_RECOGNITION_DECISION_THRESHOLD "MV_FACE_RECOGNITION_DECISION_THRESHOLD" - namespace mediavision { namespace machine_learning @@ -64,7 +31,7 @@ template class FaceRecognitionAdapter : public mediavisi { private: std::unique_ptr _face_recognition; - mv_face_recognition_input_s _source; + T _source {}; std::unique_ptr _config; public: diff --git a/mv_machine_learning/face_recognition/include/face_recognition_type.h b/mv_machine_learning/face_recognition/include/face_recognition_type.h new file mode 100644 index 00000000..b3167d9e --- /dev/null +++ b/mv_machine_learning/face_recognition/include/face_recognition_type.h @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FACE_RECOGNITION_TYPE_H__ +#define __FACE_RECOGNITION_TYPE_H__ + +#include + +/** + * @brief Defines #MV_FACENET_MODEL_FILE_PATH + * to set the backbone model file path. + * @details This model file is used to extract the feature vectors from a given face image data. + * + * @since_tizen 7.0 + * @see mv_engine_config_set_string_attribute() + * @see mv_engine_config_get_string_attribute() + */ +#define MV_FACENET_MODEL_FILE_PATH "FACENET_MODEL_FILE_PATH" + +/** + * @brief Defines #MV_FACENET_MODEL_META_FILE_PATH + * to set the backbone model meta file path. + * @details This model meta file is used to provide input and output tensor info of a given model file. + * + * @since_tizen 7.0 + * @see mv_engine_config_set_string_attribute() + * @see mv_engine_config_get_string_attribute() + */ +#define MV_FACENET_MODEL_META_FILE_PATH "FACENET_MODEL_META_FILE_PATH" + +#define MV_FACENET_OUTPUT_TENSOR_NAME "FACENET_OUTPUT_TENSOR_NAME" + +/** + * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH + * to set the path where the training relevant files are created. + * @details This path is used as a default location where the trained model, label and feature vector files are created. + * + * @since_tizen 7.0 + * @see mv_engine_config_set_string_attribute() + * @see mv_engine_config_get_string_attribute() + */ +#define MV_FACE_RECOGNITION_DEFAULT_PATH "FACE_RECOGNITION_DEFAULT_PATH" + +#define MV_FACENET_BACKEND_TYPE "FACENET_MODEL_BACKEND_TYPE" + +#define MV_FACENET_TARGET_DEVICE_TYPE "FACENET_MODEL_TARGET_DEVICE_TYPE" + +#define FACE_RECOGNITION_META_FILE_NAME "face_recognition.json" + +/** + * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD + * to set the decision threshold file+. + * @details This file is used to determine face recognition result with a given face image data is true or false.. + * + * @since_tizen 7.0 + * @see mv_engine_config_set_string_attribute() + * @see mv_engine_config_get_string_attribute() + */ +#define MV_FACE_RECOGNITION_DECISION_THRESHOLD "FACE_RECOGNITION_DECISION_THRESHOLD" + +struct facenet_input_s { + std::vector inputs; +}; + +struct facenet_output_s { + std::vector> outputs; +}; + +#endif \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/include/facenet.h b/mv_machine_learning/face_recognition/include/facenet.h new file mode 100644 index 00000000..ec5cda45 --- /dev/null +++ b/mv_machine_learning/face_recognition/include/facenet.h @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FACENET_H__ +#define __FACENET_H__ + +#include +#include +#include "mv_private.h" + +#include "EngineConfig.h" +#include "inference_engine_common_impl.h" +#include "Inference.h" +#include "facenet_parser.h" +#include "face_recognition_type.h" +#include "Preprocess.h" + +namespace mediavision +{ +namespace machine_learning +{ + +class Facenet +{ +protected: + std::unique_ptr _inference; + std::unique_ptr _config; + std::unique_ptr _parser; + facenet_output_s _result; + inference_engine_tensor_buffer *_outputTensorBuffer; + Preprocess _preprocess; + std::string _modelFilePath; + std::string _modelMetaFilePath; + std::string _facenetOutputTensorName; + int _backendType; + int _targetDeviceType; + +public: + Facenet(); + virtual ~Facenet() = default; + void parseMetaFile(); + void configure(); + void prepare(); + void preprocess(mv_source_h &mv_src); + void inference(mv_source_h source); + facenet_output_s &getResult(); +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/include/facenet_adapter.h b/mv_machine_learning/face_recognition/include/facenet_adapter.h new file mode 100644 index 00000000..2d5bd907 --- /dev/null +++ b/mv_machine_learning/face_recognition/include/facenet_adapter.h @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FACENET_ADAPTER_H__ +#define __FACENET_ADAPTER_H__ + +#include + +#include "EngineConfig.h" +#include "itask.h" +#include "facenet.h" + +namespace mediavision +{ +namespace machine_learning +{ + +template class FacenetAdapter : public mediavision::common::ITask +{ +private: + std::unique_ptr _facenet; + T _source; + +public: + FacenetAdapter(); + ~FacenetAdapter(); + + void create(int type) override; + + void configure() override; + void prepare() override; + void setInput(T &t) override; + void perform() override; + V &getOutput() override; +}; + +} // machine_learning +} // mediavision + +#endif \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/include/facenet_parser.h b/mv_machine_learning/face_recognition/include/facenet_parser.h new file mode 100644 index 00000000..1f4e0eaf --- /dev/null +++ b/mv_machine_learning/face_recognition/include/facenet_parser.h @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FACENET_PARSER_H__ +#define __FACENET_PARSER_H__ + +#include "MetaParser.h" +#include "PostprocessParser.h" + +namespace mediavision +{ +namespace machine_learning +{ +class FacenetParser : public MetaParser +{ +private: + PostprocessParser _postprocessParser; + +protected: + void parsePostprocess(std::shared_ptr meta_info, JsonObject *in_obj) override; + +public: + FacenetParser(); + ~FacenetParser(); +}; + +} +} + +#endif \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/meta/face_recognition.json b/mv_machine_learning/face_recognition/meta/face_recognition.json index 3d0bfd2e..1076f4c3 100644 --- a/mv_machine_learning/face_recognition/meta/face_recognition.json +++ b/mv_machine_learning/face_recognition/meta/face_recognition.json @@ -2,17 +2,37 @@ "attributes": [ { - "name" : "MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH", + "name" : "FACENET_MODEL_FILE_PATH", "type" : "string", "value" : "/home/owner/media/res/face_recognition/backbone/facenet.tflite" }, { - "name" : "MV_FACE_RECOGNITION_DEFAULT_PATH", + "name" : "FACENET_MODEL_META_FILE_PATH", + "type" : "string", + "value" : "/home/owner/media/res/face_recognition/backbone/facenet.json" + }, + { + "name" : "FACE_RECOGNITION_DEFAULT_PATH", "type" : "string", "value" : "/home/owner/media/res/face_recognition/training/" }, { - "name" : "MV_FACE_RECOGNITION_DECISION_THRESHOLD", + "name" : "FACENET_MODEL_BACKEND_TYPE", + "type" : "integer", + "value" : 1 + }, + { + "name" : "FACENET_MODEL_TARGET_DEVICE_TYPE", + "type" : "integer", + "value" : 1 + }, + { + "name" : "FACENET_OUTPUT_TENSOR_NAME", + "type" : "string", + "value" : "normalize/l2_normalize" + }, + { + "name" : "FACE_RECOGNITION_DECISION_THRESHOLD", "type" : "double", "value" : -0.85 } diff --git a/mv_machine_learning/face_recognition/src/face_net_info.cpp b/mv_machine_learning/face_recognition/src/face_net_info.cpp deleted file mode 100644 index 888e51bd..00000000 --- a/mv_machine_learning/face_recognition/src/face_net_info.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "face_net_info.h" - -using namespace std; - -FaceNetInfo::FaceNetInfo(string model_file_path) -{ - _model_file_path = model_file_path; - - const string input_layer_name = { "input_1" }; - const inference_engine_tensor_info input_tensor_info = { - { 160, 160, 3, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(1 * 3 * 160 * 160) - }; - - model_layer_info input_info = { input_layer_name, input_tensor_info }; - _input_layer_info.push_back(input_info); - - const string output_layer_name = { "normalize/l2_normalize" }; - const inference_engine_tensor_info output_tensor_info = { - { 512, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(1 * 512) - }; - - model_layer_info output_info = { output_layer_name, output_tensor_info }; - _output_layer_info.push_back(output_info); -} - -FaceNetInfo::~FaceNetInfo() -{ - _input_layer_info.clear(); - _output_layer_info.clear(); -} - -string FaceNetInfo::GetModelFilePath() -{ - return _model_file_path; -} - -vector &FaceNetInfo::GetInputLayerInfo() -{ - return _input_layer_info; -} - -vector &FaceNetInfo::GetOutputLayerInfo() -{ - return _output_layer_info; -} \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/src/face_recognition.cpp b/mv_machine_learning/face_recognition/src/face_recognition.cpp index 212a5e08..5b3afdbd 100644 --- a/mv_machine_learning/face_recognition/src/face_recognition.cpp +++ b/mv_machine_learning/face_recognition/src/face_recognition.cpp @@ -32,7 +32,6 @@ #include "face_recognition.h" #include "nntrainer_fvm.h" #include "nntrainer_dsm.h" -#include "face_net_info.h" #include "file_util.h" using namespace std; @@ -188,54 +187,12 @@ int FaceRecognition::GetVecFromMvSource(mv_source_h img_src, std::vector int FaceRecognition::Initialize() { - _backbone_model_info = make_unique(_config.backbone_model_file_path); - - if (_backbone_model_info->GetInputLayerInfo().empty() || _backbone_model_info->GetInputLayerInfo().size() > 1) { - LOGE("Invalid input layer size - input layer size should be 1."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - if (_backbone_model_info->GetOutputLayerInfo().empty() || _backbone_model_info->GetOutputLayerInfo().size() > 1) { - LOGE("Invalid output layer size - output layer size should be 1."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - vector input_layer_names, output_layer_names; - - for (auto &input : _backbone_model_info->GetInputLayerInfo()) - input_layer_names.push_back(input.layer_name); - - for (auto &output : _backbone_model_info->GetOutputLayerInfo()) - output_layer_names.push_back(output.layer_name); - - // Initialize inference engine object for backbone model. - _backbone = make_unique(); - - int ret = _backbone->Bind(_config.backbone_engine_backend_type, _config.backbone_target_device_type); - if (ret != MEDIA_VISION_ERROR_NONE) - return ret; - - // Tensor order is NCHW. - vector &input_layer_info = GetBackboneInputLayerInfo(); - size_t width = input_layer_info[0].tensor_info.shape[0]; - size_t height = input_layer_info[0].tensor_info.shape[1]; - size_t ch = input_layer_info[0].tensor_info.shape[2]; - vector output_tensor_info; - - _backbone->ConfigureInputInfo(width, height, 1, ch, 127.5f, 127.5f, MV_INFERENCE_DATA_FLOAT32, input_layer_names); - _backbone->ConfigureOutputInfo(output_layer_names, output_tensor_info); - _backbone->ConfigureModelFiles("", _backbone_model_info->GetModelFilePath(), ""); - - ret = _backbone->Load(); - if (ret != MEDIA_VISION_ERROR_NONE) - return ret; - _training_model = make_unique(_config.training_engine_backend_type, _config.training_target_device_type, _config.internal_model_file_path); _internal = make_unique(); - ret = _internal->Bind(_config.inference_engine_backend_type, _config.inference_target_device_type); + int ret = _internal->Bind(_config.inference_engine_backend_type, _config.inference_target_device_type); if (ret != MEDIA_VISION_ERROR_NONE) return ret; @@ -261,10 +218,8 @@ void FaceRecognition::ImportLabel() } } -int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name) +int FaceRecognition::RegisterNewFace(std::vector &input_vec, string label_name) { - vector &output_layer_info = _backbone_model_info->GetOutputLayerInfo(); - if (_status < INITIALIZED) { LOGE("Initialization not ready yet. (%u)", _status); return MEDIA_VISION_ERROR_INVALID_OPERATION; @@ -283,30 +238,6 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name) return MEDIA_VISION_ERROR_INVALID_OPERATION; } - std::vector backbone_sources { img_src }; - std::vector rects; - - int ret = _backbone->Run(backbone_sources, rects); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("fail to inference backbone model."); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // 2. Get feature vector from a given vec through inference engine. - // Ps. output layer size should be 1. - TensorBuffer tensorBuffer = _backbone->GetOutputTensorBuffer(); - inference_engine_tensor_buffer *backbone_output_buffer = - tensorBuffer.getTensorBuffer(output_layer_info[0].layer_name); - if (!backbone_output_buffer) { - LOGE("fail to get output tensor buffer."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - vector feature_vec; - auto buffer = static_cast(backbone_output_buffer->buffer); - - copy(buffer, buffer + backbone_output_buffer->size / sizeof(float), back_inserter(feature_vec)); - // Get label index and count. unsigned int label_idx = _label_manager->GetLabelIndex(label_name); unsigned int label_cnt = _label_manager->GetMaxLabel(); @@ -324,7 +255,7 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name) } // Add new feature vectors. - data_set->AddDataSet(feature_vec, label_idx, label_cnt); + data_set->AddDataSet(input_vec, label_idx, label_cnt); _training_model->ApplyDataSet(data_set); _training_model->Compile(); @@ -388,7 +319,7 @@ int FaceRecognition::GetAnswer() return MEDIA_VISION_ERROR_NONE; } -int FaceRecognition::RecognizeFace(mv_source_h img_src) +int FaceRecognition::RecognizeFace(std::vector &input_vec) { if (_status < INITIALIZED) { LOGE("Initialization not ready yet.(%u)", _status); @@ -417,38 +348,6 @@ int FaceRecognition::RecognizeFace(mv_source_h img_src) // Import label data from a label file. ImportLabel(); - if (_backbone_model_info->GetInputLayerInfo().empty() || _backbone_model_info->GetInputLayerInfo().size() > 1) { - LOGE("Invalid input layer size - input layer size should be 1."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - if (_backbone_model_info->GetOutputLayerInfo().empty() || - _backbone_model_info->GetOutputLayerInfo().size() > 1) { - LOGE("Invalid output layer size - output layer size should be 1."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - std::vector backbone_sources { img_src }; - std::vector backbone_rects; - - // Do inference to backbone model to get feature vector. - int ret = _backbone->Run(backbone_sources, backbone_rects); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("fail to inference backbone model."); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // Get output layer info for facenet model. - vector &output_layer_info = _backbone_model_info->GetOutputLayerInfo(); - // Get output tensor buffer to the output layer. - TensorBuffer tensorBuffer = _backbone->GetOutputTensorBuffer(); - inference_engine_tensor_buffer *backbone_output_buffer = - tensorBuffer.getTensorBuffer(output_layer_info[0].layer_name); - if (!backbone_output_buffer) { - LOGE("fail to get backbone output tensor buffer."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - // Tensor order is NCHW. size_t width = input_tensor_info[0].shape[0]; size_t height = input_tensor_info[0].shape[1]; @@ -462,23 +361,23 @@ int FaceRecognition::RecognizeFace(mv_source_h img_src) _internal->ConfigureModelFiles("", _config.internal_model_file_path, ""); // Load the trained internal model. - ret = _internal->Load(); + int ret = _internal->Load(); if (ret != INFERENCE_ENGINE_ERROR_NONE) { LOGE("Fail to Load."); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - std::vector raw_buffers { static_cast(backbone_output_buffer) }; + std::vector> input_tensors = { input_vec }; // Do inference to the internal model. - ret = _internal->Run(raw_buffers); + ret = _internal->Run(input_tensors); if (ret != INFERENCE_ENGINE_ERROR_NONE) { LOGE("fail to inference internal model."); return MEDIA_VISION_ERROR_INVALID_OPERATION; } // output layer size should be 1. - tensorBuffer = _internal->GetOutputTensorBuffer(); + TensorBuffer tensorBuffer = _internal->GetOutputTensorBuffer(); inference_engine_tensor_buffer *internal_output_buffer = tensorBuffer.getTensorBuffer(output_layers[0]); if (!internal_output_buffer) { LOGE("fail to get internal output tensor buffer."); diff --git a/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp b/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp index 79619011..30a847a4 100644 --- a/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp +++ b/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp @@ -16,8 +16,7 @@ #include "machine_learning_exception.h" #include "face_recognition_adapter.h" - -#define FACE_RECOGNITION_META_FILE_NAME "face_recognition.json" +#include "face_recognition_type.h" using namespace std; using namespace MediaVision::Common; @@ -44,16 +43,10 @@ template void FaceRecognitionAdapter::create(int t template void FaceRecognitionAdapter::configure() { _config = make_unique(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME)); - string backboneModelFilePath; - int ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH), &backboneModelFilePath); - if (ret != MEDIA_VISION_ERROR_NONE) - throw InvalidParameter("Failed to get an attribute"); - - LOGD("Backbone model file path : %s", backboneModelFilePath.c_str()); string defaultPath; - ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_DEFAULT_PATH), &defaultPath); + int ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_DEFAULT_PATH), &defaultPath); if (ret != MEDIA_VISION_ERROR_NONE) throw InvalidOperation("Fail to get default path."); @@ -69,9 +62,6 @@ template void FaceRecognitionAdapter::configure() MV_INFERENCE_BACKEND_NNTRAINER, // not used and default type is used. See TrainingModel() MV_INFERENCE_TARGET_DEVICE_CPU, MV_INFERENCE_BACKEND_NNTRAINER, - MV_INFERENCE_TARGET_DEVICE_CPU, - MV_INFERENCE_BACKEND_TFLITE, - backboneModelFilePath, string(defaultPath) + "model_and_weights.ini", string(defaultPath) + "labels.dat", string(defaultPath) + "feature_vector_file.dat", @@ -97,7 +87,7 @@ template void FaceRecognitionAdapter::perform() { if (_source.mode == mode::REGISTER) { for (auto &s : _source.register_src) { - int ret = _face_recognition->RegisterNewFace(s.first, s.second); + int ret = _face_recognition->RegisterNewFace(s.data, s.label); if (ret != MEDIA_VISION_ERROR_NONE) throw InvalidOperation("Fail to register new face."); } @@ -106,7 +96,8 @@ template void FaceRecognitionAdapter::perform() } if (_source.mode == mode::INFERENCE) { - int ret = _face_recognition->RecognizeFace(_source.inference_src); + // _source.inputs.size should be 1. + int ret = _face_recognition->RecognizeFace(_source.inputs[0]); if (ret == MEDIA_VISION_ERROR_NO_DATA) throw NoData("Label not found."); diff --git a/mv_machine_learning/face_recognition/src/facenet.cpp b/mv_machine_learning/face_recognition/src/facenet.cpp new file mode 100644 index 00000000..93c6405c --- /dev/null +++ b/mv_machine_learning/face_recognition/src/facenet.cpp @@ -0,0 +1,160 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "machine_learning_exception.h" +#include "facenet.h" +#include "face_recognition_type.h" + +using namespace std; +using namespace mediavision::inference; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ +Facenet::Facenet() : _backendType(), _targetDeviceType() +{ + _inference = make_unique(); + _parser = make_unique(); +} + +static bool IsJsonFile(const string &fileName) +{ + return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json")); +} + +void Facenet::parseMetaFile() +{ + _config = make_unique(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME)); + + int ret = _config->getIntegerAttribute(string(MV_FACENET_BACKEND_TYPE), &_backendType); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get backend engine type."); + + ret = _config->getIntegerAttribute(string(MV_FACENET_TARGET_DEVICE_TYPE), &_targetDeviceType); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get target device type."); + + ret = _config->getStringAttribute(MV_FACENET_MODEL_FILE_PATH, &_modelFilePath); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get model file path."); + + ret = _config->getStringAttribute(MV_FACENET_MODEL_META_FILE_PATH, &_modelMetaFilePath); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get model meta file path."); + + ret = _config->getStringAttribute(MV_FACENET_OUTPUT_TENSOR_NAME, &_facenetOutputTensorName); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to get facenet output tensor name."); + + if (_modelMetaFilePath.empty()) + throw InvalidOperation("Model meta file doesn't exist."); + + if (!IsJsonFile(_modelMetaFilePath)) + throw InvalidOperation("Model meta file should be json."); + + _parser->load(_modelMetaFilePath); +} + +void Facenet::configure() +{ + int ret = _inference->Bind(_backendType, _targetDeviceType); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to bind a backend engine."); +} + +void Facenet::prepare() +{ + int ret = _inference->configureInputMetaInfo(_parser->getInputMetaMap()); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to configure input tensor info from meta file."); + + ret = _inference->configureOutputMetaInfo(_parser->getOutputMetaMap()); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to configure output tensor info from meta file."); + + _inference->ConfigureModelFiles("", _modelFilePath, ""); + + // Request to load model files to a backend engine. + ret = _inference->Load(); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to load model files."); +} +void Facenet::preprocess(mv_source_h &mv_src) +{ + LOGI("ENTER"); + + TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer(); + IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer(); + vector mv_srcs = { mv_src }; + + _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer); + + LOGI("LEAVE"); +} + +void Facenet::inference(mv_source_h source) +{ + LOGI("ENTER"); + + vector sources; + + sources.push_back(source); + + int ret = _inference->Run(); + if (ret != MEDIA_VISION_ERROR_NONE) + throw InvalidOperation("Fail to run inference"); + + LOGI("LEAVE"); +} + +facenet_output_s &Facenet::getResult() +{ + TensorBuffer &tensor_buffer_obj = _inference->GetOutputTensorBuffer(); + IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer(); + + // Make sure to clear _result.outputs vectors because if not clear then other output_vector will be pushed to _result.outputs + // and it results in sending wrong output vector to face recognition framework. + _result.outputs.clear(); + + for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++) { + if (it->first.compare(_facenetOutputTensorName) == 0) { + _outputTensorBuffer = tensor_buffer_obj.getTensorBuffer(it->first); + if (!_outputTensorBuffer) + throw InvalidOperation("Fail to get output tensor buffer."); + + vector output_vector; + float *buffer = reinterpret_cast(_outputTensorBuffer->buffer); + + copy(&buffer[0], &buffer[_outputTensorBuffer->size / sizeof(float)], back_inserter(output_vector)); + _result.outputs.push_back(output_vector); + + return _result; + } + } + + throw InvalidOperation("No output tensor."); +} + +} +} \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/src/facenet_adapter.cpp b/mv_machine_learning/face_recognition/src/facenet_adapter.cpp new file mode 100644 index 00000000..a85c7bcf --- /dev/null +++ b/mv_machine_learning/face_recognition/src/facenet_adapter.cpp @@ -0,0 +1,83 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "machine_learning_exception.h" +#include "facenet_adapter.h" + +using namespace std; +using namespace MediaVision::Common; +using namespace mediavision::machine_learning; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ +template FacenetAdapter::FacenetAdapter() : _source() +{ + _facenet = make_unique(); +} + +template FacenetAdapter::~FacenetAdapter() +{} + +template void FacenetAdapter::create(int type) +{ + throw InvalidOperation("Not support yet."); +} + +template void FacenetAdapter::configure() +{ + try { + _facenet->parseMetaFile(); + _facenet->configure(); + } catch (const BaseException &e) { + throw e; + } +} + +template void FacenetAdapter::prepare() +{ + try { + _facenet->prepare(); + } catch (const BaseException &e) { + throw e; + } +} + +template void FacenetAdapter::setInput(T &t) +{ + _source = t; +} + +template void FacenetAdapter::perform() +{ + try { + _facenet->preprocess(_source.inputs[0]); + _facenet->inference(_source.inputs[0]); + } catch (const BaseException &e) { + throw e; + } +} + +template V &FacenetAdapter::getOutput() +{ + return _facenet->getResult(); +} + +template class FacenetAdapter; +} +} \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/src/facenet_parser.cpp b/mv_machine_learning/face_recognition/src/facenet_parser.cpp new file mode 100644 index 00000000..b04d5616 --- /dev/null +++ b/mv_machine_learning/face_recognition/src/facenet_parser.cpp @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "machine_learning_exception.h" +#include "facenet_parser.h" + +using namespace std; +using namespace mediavision::machine_learning::exception; + +namespace mediavision +{ +namespace machine_learning +{ +FacenetParser::FacenetParser() +{ + LOGI("ENTER"); + LOGI("LEAVE"); +} + +FacenetParser::~FacenetParser() +{} + +void FacenetParser::parsePostprocess(shared_ptr meta_info, JsonObject *in_obj) +{ + LOGI("ENTER"); + + LOGI("tensor name : %s", meta_info->name.c_str()); + + if (json_object_has_member(in_obj, "box")) + _postprocessParser.parseBox(meta_info, in_obj); + + LOGI("LEAVE"); +} + +} +} \ No newline at end of file diff --git a/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp b/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp index 5d305f30..1b5042b4 100644 --- a/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp +++ b/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp @@ -19,6 +19,7 @@ #include #include "face_recognition_adapter.h" +#include "facenet_adapter.h" #include "mv_face_recognition_open.h" #include "machine_learning_exception.h" #include "context.h" @@ -29,6 +30,7 @@ using namespace mediavision::machine_learning; using namespace mediavision::machine_learning::face_recognition; using namespace mediavision::machine_learning::exception; using FaceRecognitionTask = ITask; +using FacenetTask = ITask; int mv_face_recognition_create_open(mv_face_recognition_h *handle) { @@ -43,9 +45,18 @@ int mv_face_recognition_create_open(mv_face_recognition_h *handle) return MEDIA_VISION_ERROR_OUT_OF_MEMORY; } - FaceRecognitionTask *task = new (nothrow) + FaceRecognitionTask *face_recognition_task = new (nothrow) FaceRecognitionAdapter(); - if (!task) { + if (!face_recognition_task) { + delete context; + LOGE("Fail to allocate a task."); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + FacenetTask *facenet_task = new (nothrow) + FacenetAdapter(); + if (!facenet_task) { + delete face_recognition_task; delete context; LOGE("Fail to allocate a task."); return MEDIA_VISION_ERROR_OUT_OF_MEMORY; @@ -53,8 +64,19 @@ int mv_face_recognition_create_open(mv_face_recognition_h *handle) pair::iterator, bool> result; - result = context->__tasks.insert(pair("face_recognition", task)); + result = context->__tasks.insert(pair("face_recognition", face_recognition_task)); if (!result.second) { + delete facenet_task; + delete face_recognition_task; + delete context; + LOGE("Fail to register a new task. Same task already exists."); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + result = context->__tasks.insert(pair("facenet", facenet_task)); + if (!result.second) { + delete facenet_task; + delete face_recognition_task; delete context; LOGE("Fail to register a new task. Same task already exists."); return MEDIA_VISION_ERROR_INVALID_OPERATION; @@ -78,8 +100,15 @@ int mv_face_recognition_destroy_open(mv_face_recognition_h handle) map::iterator iter; for (iter = context->__tasks.begin(); iter != context->__tasks.end(); ++iter) { - auto task = static_cast(iter->second); - delete task; + if (iter->first.compare("face_recognition") == 0) { + auto face_recognition_task = static_cast(iter->second); + delete face_recognition_task; + } + + if (iter->first.compare("facenet") == 0) { + auto facenet_task = static_cast(iter->second); + delete facenet_task; + } } delete context; @@ -100,10 +129,13 @@ int mv_face_recognition_prepare_open(mv_face_recognition_h handle) try { Context *context = static_cast(handle); - auto task = static_cast(context->__tasks["face_recognition"]); + auto face_recognition_task = static_cast(context->__tasks["face_recognition"]); + auto facenet_task = static_cast(context->__tasks["facenet"]); - task->configure(); - task->prepare(); + face_recognition_task->configure(); + facenet_task->configure(); + face_recognition_task->prepare(); + facenet_task->prepare(); } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); @@ -125,14 +157,22 @@ int mv_face_recognition_register_open(mv_face_recognition_h handle, mv_source_h try { Context *context = static_cast(handle); - auto task = static_cast(context->__tasks["face_recognition"]); + auto face_recognition_task = static_cast(context->__tasks["face_recognition"]); + auto facenet_task = static_cast(context->__tasks["facenet"]); + + facenet_input_s facenet_input = { { source } }; - mv_face_recognition_input_s input = { mode::REGISTER }; + facenet_task->setInput(facenet_input); + facenet_task->perform(); - input.register_src.clear(); - input.register_src.insert(make_pair(source, string(label))); - task->setInput(input); - task->perform(); + facenet_output_s &facenet_output = facenet_task->getOutput(); + mv_face_recognition_input_s face_recognition_input = { .mode = mode::REGISTER }; + face_recognition_register_input_s facenet_to_face_reg_input = { facenet_output.outputs[0], string(label) }; + + face_recognition_input.register_src.clear(); + face_recognition_input.register_src.push_back(facenet_to_face_reg_input); + face_recognition_task->setInput(face_recognition_input); + face_recognition_task->perform(); } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); @@ -154,14 +194,14 @@ int mv_face_recognition_unregister_open(mv_face_recognition_h handle, const char try { Context *context = static_cast(handle); - auto task = static_cast(context->__tasks["face_recognition"]); + auto face_recognition_task = static_cast(context->__tasks["face_recognition"]); mv_face_recognition_input_s input = { mode::DELETE }; input.labels.clear(); input.labels.push_back(string(label)); - task->setInput(input); - task->perform(); + face_recognition_task->setInput(input); + face_recognition_task->perform(); } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); @@ -183,13 +223,20 @@ int mv_face_recognition_inference_open(mv_face_recognition_h handle, mv_source_h try { Context *context = static_cast(handle); - auto task = static_cast(context->__tasks["face_recognition"]); + auto face_recognition_task = static_cast(context->__tasks["face_recognition"]); + auto facenet_task = static_cast(context->__tasks["facenet"]); + + facenet_input_s facenet_input = { { source } }; + + facenet_task->setInput(facenet_input); + facenet_task->perform(); + facenet_output_s &facenet_output = facenet_task->getOutput(); - mv_face_recognition_input_s input = { mode::INFERENCE }; + mv_face_recognition_input_s face_recognition_input = { mode::INFERENCE }; - input.inference_src = source; - task->setInput(input); - task->perform(); + face_recognition_input.inputs = facenet_output.outputs; + face_recognition_task->setInput(face_recognition_input); + face_recognition_task->perform(); } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); @@ -211,9 +258,9 @@ int mv_face_recognition_get_label_open(mv_face_recognition_h handle, const char try { Context *context = static_cast(handle); - auto task = static_cast(context->__tasks["face_recognition"]); + auto face_recognition_task = static_cast(context->__tasks["face_recognition"]); - *out_label = task->getOutput().label.c_str(); + *out_label = face_recognition_task->getOutput().label.c_str(); } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); diff --git a/mv_machine_learning/inference/include/Inference.h b/mv_machine_learning/inference/include/Inference.h index 4cfc73e5..b8093d61 100644 --- a/mv_machine_learning/inference/include/Inference.h +++ b/mv_machine_learning/inference/include/Inference.h @@ -258,17 +258,7 @@ public: */ int Run(std::vector &mvSources, std::vector &rects); - /** - * @brief Runs inference with a region of a given image - * @details Use this function to run forward pass with the given image. - * The given image is preprocessed and the region of the image is - * thrown to neural network. Then, the output tensor is returned. - * If roi is NULL, then full source will be analyzed. - * - * @since_tizen 7.0 - * @return @c true on success, otherwise a negative error value - */ - int Run(std::vector &buffer_objs); + int Run(std::vector> &input_tensors); int Run(); diff --git a/mv_machine_learning/inference/src/Inference.cpp b/mv_machine_learning/inference/src/Inference.cpp index 320c1160..5f5c40c3 100644 --- a/mv_machine_learning/inference/src/Inference.cpp +++ b/mv_machine_learning/inference/src/Inference.cpp @@ -1020,7 +1020,7 @@ int Inference::Run(std::vector &mvSources, std::vector &buffer_objs) +int Inference::Run(std::vector> &input_tensors) { int ret = INFERENCE_ENGINE_ERROR_NONE; @@ -1029,18 +1029,18 @@ int Inference::Run(std::vector &buffer_objs) return MEDIA_VISION_ERROR_INVALID_OPERATION; } - if (buffer_objs.empty()) { + if (input_tensors.empty()) { LOGE("cvSources should contain only one cv source."); return MEDIA_VISION_ERROR_INVALID_PARAMETER; } // We are able to request Only one input data for the inference as of now. - if (buffer_objs.size() > 1) { + if (input_tensors.size() > 1) { LOGE("It allows only one source for the inference."); return MEDIA_VISION_ERROR_INVALID_PARAMETER; } - if (mInputTensorBuffers.getIETensorBuffer().size() != buffer_objs.size()) { + if (mInputTensorBuffers.getIETensorBuffer().size() != input_tensors.size()) { LOGE("Raw source count is not invalid."); return MEDIA_VISION_ERROR_INVALID_PARAMETER; } @@ -1049,17 +1049,17 @@ int Inference::Run(std::vector &buffer_objs) for (auto &buffer : mInputTensorBuffers.getIETensorBuffer()) { inference_engine_tensor_buffer &tensor_buffer = buffer.second; - inference_engine_tensor_buffer *buffer_obj = - static_cast(buffer_objs[buffer_idx]); + std::vector &input_tensor = input_tensors[buffer_idx]; + const size_t input_tensor_size = input_tensor.size() * sizeof(float); - if (tensor_buffer.size != buffer_obj->size) { - LOGE("Raw buffer size is invalid."); + if (tensor_buffer.size != input_tensor_size) { + LOGE("Raw buffer size is invalid.(%u vs %u)", tensor_buffer.size, input_tensor_size); return MEDIA_VISION_ERROR_INVALID_PARAMETER; } - LOGI("A number of tensor bytes : %zu", buffer_obj->size); + LOGI("A number of tensor bytes : %zu", input_tensor_size); - memcpy(tensor_buffer.buffer, buffer_obj->buffer, tensor_buffer.size); + memcpy(tensor_buffer.buffer, input_tensor.data(), input_tensor_size); buffer_idx++; }