mv_machine_learning: use raw data type instead of mv_source_h
authorInki Dae <inki.dae@samsung.com>
Wed, 3 May 2023 05:07:30 +0000 (14:07 +0900)
committerKwanghoon Son <k.son@samsung.com>
Wed, 28 Jun 2023 09:51:32 +0000 (18:51 +0900)
[Issue type] : code refactoring

Use raw data type instead of mv_source. This patch makes Inference class
to use raw data type instead of mv_source_h as input data to perform
a requested inference. mv_source_h is a wrapper of raw buffer which
contains image data so we don't have to use mv_source_h after preprocessing.

Until now, Inference class did preprocessing work before performing an
inference with a given input image. However, preprocessing work is a step that
it can be used commonly for other core modules of Mediavision. Therefore,
it separates the preprocessing step from Inference class by using a new
run member function - the run member function needs raw tensor data.

With this patch, the preliminary work for supporting the async API is completed.

Change-Id: Ieddb3945e564645faf2abd1d93d936012e6f0d86
Signed-off-by: Inki Dae <inki.dae@samsung.com>
23 files changed:
mv_machine_learning/face_recognition/include/facenet.h
mv_machine_learning/face_recognition/src/face_recognition.cpp
mv_machine_learning/face_recognition/src/facenet.cpp
mv_machine_learning/face_recognition/src/facenet_adapter.cpp
mv_machine_learning/image_classification/include/image_classification.h
mv_machine_learning/image_classification/src/image_classification.cpp
mv_machine_learning/image_classification/src/image_classification_adapter.cpp
mv_machine_learning/inference/include/Inference.h
mv_machine_learning/inference/src/Inference.cpp
mv_machine_learning/landmark_detection/include/landmark_detection.h
mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp
mv_machine_learning/landmark_detection/src/landmark_detection.cpp
mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp
mv_machine_learning/meta/include/Preprocess.h
mv_machine_learning/meta/src/MetaParser.cpp
mv_machine_learning/meta/src/Preprocess.cpp
mv_machine_learning/object_detection/include/object_detection.h
mv_machine_learning/object_detection/src/face_detection_adapter.cpp
mv_machine_learning/object_detection/src/object_detection.cpp
mv_machine_learning/object_detection/src/object_detection_adapter.cpp
mv_machine_learning/object_detection_3d/include/object_detection_3d.h
mv_machine_learning/object_detection_3d/src/object_detection_3d.cpp
mv_machine_learning/object_detection_3d/src/object_detection_3d_adapter.cpp

index 0fd375b..be0ce47 100644 (file)
@@ -47,14 +47,18 @@ protected:
        int _backendType {};
        int _targetDeviceType {};
 
+       template<typename T>
+       void preprocess(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
+       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
+
 public:
        Facenet();
        virtual ~Facenet() = default;
        void parseMetaFile();
+       std::shared_ptr<MetaInfo> getInputMetaInfo();
        void configure();
        void prepare();
-       void preprocess(mv_source_h &mv_src);
-       void inference(mv_source_h source);
+       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
        FacenetOutput &result();
 };
 
index d7c1c5e..645b517 100644 (file)
@@ -303,7 +303,7 @@ int FaceRecognition::recognizeFace(std::vector<float> &input_vec)
                std::vector<std::vector<float> > input_tensors = { input_vec };
 
                // Do inference to the internal model.
-               ret = _internal->run(input_tensors);
+               ret = _internal->run<float>(input_tensors);
                if (ret != INFERENCE_ENGINE_ERROR_NONE) {
                        LOGE("fail to inference internal model.");
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
index 28b0b46..7551b4c 100644 (file)
@@ -111,30 +111,53 @@ void Facenet::prepare()
                throw InvalidOperation("Fail to load model files.");
 }
 
-void Facenet::preprocess(mv_source_h &mv_src)
+shared_ptr<MetaInfo> Facenet::getInputMetaInfo()
 {
-       LOGI("ENTER");
+       TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
+       IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
+
+       // TODO. consider using multiple tensors later.
+       if (tensor_info_map.size() != 1)
+               throw InvalidOperation("Input tensor count not invalid.");
+
+       auto tensor_buffer_iter = tensor_info_map.begin();
+
+       // Get the meta information corresponding to a given input tensor name.
+       return _parser->getInputMetaMap()[tensor_buffer_iter->first];
+}
 
-       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-       vector<mv_source_h> mv_srcs = { mv_src };
+template<typename T>
+void Facenet::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
+{
+       LOGI("ENTER");
 
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+       _preprocess.run<T>(mv_src, metaInfo, inputVector);
 
        LOGI("LEAVE");
 }
 
-void Facenet::inference(mv_source_h source)
+template<typename T> void Facenet::inference(vector<vector<T> > &inputVectors)
 {
        LOGI("ENTER");
 
-       int ret = _inference->run();
+       int ret = _inference->run<T>(inputVectors);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to run inference");
 
        LOGI("LEAVE");
 }
 
+template<typename T> void Facenet::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
+{
+       vector<T> inputVector;
+
+       preprocess<T>(mv_src, metaInfo, inputVector);
+
+       vector<vector<T> > inputVectors = { inputVector };
+
+       inference<T>(inputVectors);
+}
+
 FacenetOutput &Facenet::result()
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
@@ -154,5 +177,15 @@ FacenetOutput &Facenet::result()
        return _result;
 }
 
+template void Facenet::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                vector<float> &inputVector);
+template void Facenet::inference<float>(vector<vector<float> > &inputVectors);
+template void Facenet::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
+template void Facenet::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                vector<unsigned char> &inputVector);
+template void Facenet::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
+template void Facenet::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
 }
 }
\ No newline at end of file
index bf0db2d..cef18dc 100644 (file)
@@ -80,8 +80,13 @@ template<typename T, typename V> void FacenetAdapter<T, V>::setInput(T &t)
 
 template<typename T, typename V> void FacenetAdapter<T, V>::perform()
 {
-       _facenet->preprocess(_source.inputs[0]);
-       _facenet->inference(_source.inputs[0]);
+       shared_ptr<MetaInfo> metaInfo = _facenet->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _facenet->perform<unsigned char>(_source.inputs[0], metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _facenet->perform<float>(_source.inputs[0], metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &FacenetAdapter<T, V>::getOutput()
index aa8f245..1d9c9da 100644 (file)
@@ -55,6 +55,9 @@ protected:
 
        void getOutputNames(std::vector<std::string> &names);
        void getOutpuTensor(std::string &target_name, std::vector<float> &tensor);
+       template<typename T>
+       void preprocess(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
+       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
 
 public:
        ImageClassification();
@@ -66,10 +69,10 @@ public:
        void getEngineType(unsigned int engine_index, char **engine_type);
        void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
        void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
+       std::shared_ptr<MetaInfo> getInputMetaInfo();
        void configure();
        void prepare();
-       void preprocess(mv_source_h &mv_src);
-       void inference(mv_source_h source);
+       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
        virtual ImageClassificationResult &result() = 0;
 };
 
index 16c9969..3638fcd 100644 (file)
@@ -258,34 +258,53 @@ void ImageClassification::prepare()
                throw InvalidOperation("Fail to load model files.");
 }
 
-void ImageClassification::preprocess(mv_source_h &mv_src)
+shared_ptr<MetaInfo> ImageClassification::getInputMetaInfo()
 {
-       LOGI("ENTER");
+       TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
+       IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
 
-       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-       vector<mv_source_h> mv_srcs = { mv_src };
+       // TODO. consider using multiple tensors later.
+       if (tensor_info_map.size() != 1)
+               throw InvalidOperation("Input tensor count not invalid.");
 
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+       auto tensor_buffer_iter = tensor_info_map.begin();
 
-       LOGI("LEAVE");
+       // Get the meta information corresponding to a given input tensor name.
+       return _parser->getInputMetaMap()[tensor_buffer_iter->first];
 }
 
-void ImageClassification::inference(mv_source_h source)
+template<typename T>
+void ImageClassification::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 {
        LOGI("ENTER");
 
-       vector<mv_source_h> sources;
+       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+
+       LOGI("LEAVE");
+}
 
-       sources.push_back(source);
+template<typename T> void ImageClassification::inference(vector<vector<T> > &inputVectors)
+{
+       LOGI("ENTER");
 
-       int ret = _inference->run();
+       int ret = _inference->run<T>(inputVectors);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to run inference");
 
        LOGI("LEAVE");
 }
 
+template<typename T> void ImageClassification::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
+{
+       vector<T> inputVector;
+
+       preprocess<T>(mv_src, metaInfo, inputVector);
+
+       vector<vector<T> > inputVectors = { inputVector };
+
+       inference<T>(inputVectors);
+}
+
 void ImageClassification::getOutputNames(vector<string> &names)
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
@@ -312,5 +331,15 @@ void ImageClassification::getOutpuTensor(string &target_name, vector<float> &ten
        LOGI("LEAVE");
 }
 
+template void ImageClassification::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                        vector<float> &inputVector);
+template void ImageClassification::inference<float>(vector<vector<float> > &inputVectors);
+template void ImageClassification::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
+template void ImageClassification::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                                        vector<unsigned char> &inputVector);
+template void ImageClassification::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
+template void ImageClassification::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
 }
 }
\ No newline at end of file
index 1b474d7..a72c65e 100644 (file)
@@ -98,8 +98,13 @@ template<typename T, typename V> void ImageClassificationAdapter<T, V>::setInput
 
 template<typename T, typename V> void ImageClassificationAdapter<T, V>::perform()
 {
-       _image_classification->preprocess(_source.inference_src);
-       _image_classification->inference(_source.inference_src);
+       shared_ptr<MetaInfo> metaInfo = _image_classification->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _image_classification->perform<unsigned char>(_source.inference_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _image_classification->perform<float>(_source.inference_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &ImageClassificationAdapter<T, V>::getOutput()
index 53c009d..26c1ddd 100644 (file)
@@ -249,7 +249,7 @@ public:
                 */
        int run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects);
 
-       int run(std::vector<std::vector<float> > &input_tensors);
+       template<typename T> int run(std::vector<std::vector<T> > &input_tensors);
 
        int run();
 
index aa9b53e..437093b 100644 (file)
@@ -978,7 +978,7 @@ int Inference::run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle
        return convertOutputDataTypeToFloat();
 }
 
-int Inference::run(std::vector<std::vector<float> > &input_tensors)
+template<typename T> int Inference::run(std::vector<std::vector<T> > &input_tensors)
 {
        int ret = INFERENCE_ENGINE_ERROR_NONE;
 
@@ -1007,8 +1007,8 @@ int Inference::run(std::vector<std::vector<float> > &input_tensors)
 
        for (auto &buffer : mInputTensorBuffers.getIETensorBuffer()) {
                inference_engine_tensor_buffer &tensor_buffer = buffer.second;
-               std::vector<float> &input_tensor = input_tensors[buffer_idx];
-               const size_t input_tensor_size = input_tensor.size() * sizeof(float);
+               std::vector<T> &input_tensor = input_tensors[buffer_idx];
+               const size_t input_tensor_size = input_tensor.size() * sizeof(T);
 
                if (tensor_buffer.size != input_tensor_size) {
                        LOGE("Raw buffer size is invalid.(%zu vs %zu)", tensor_buffer.size, input_tensor_size);
@@ -1720,5 +1720,8 @@ int Inference::getPoseLandmarkDetectionResults(std::unique_ptr<mv_inference_pose
        return MEDIA_VISION_ERROR_NONE;
 }
 
+template int Inference::run<float>(std::vector<std::vector<float> > &input_tensors);
+template int Inference::run<unsigned char>(std::vector<std::vector<unsigned char> > &input_tensors);
+
 } /* Inference */
 } /* MediaVision */
index 89b718b..a907e4d 100644 (file)
@@ -59,6 +59,9 @@ protected:
 
        void getOutputNames(std::vector<std::string> &names);
        void getOutputTensor(std::string target_name, std::vector<float> &tensor);
+       template<typename T>
+       void preprocess(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
+       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
 
 public:
        LandmarkDetection(LandmarkDetectionTaskType task_type);
@@ -70,11 +73,11 @@ public:
        void getEngineType(unsigned int engine_index, char **engine_type);
        void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
        void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
+       std::shared_ptr<MetaInfo> getInputMetaInfo();
        void parseMetaFile(const char *meta_file_name);
        void configure();
        void prepare();
-       void preprocess(mv_source_h &mv_src);
-       void inference(mv_source_h source);
+       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
        virtual LandmarkDetectionResult &result() = 0;
 };
 
index b633370..59a9712 100644 (file)
@@ -131,8 +131,13 @@ template<typename T, typename V> void FacialLandmarkAdapter<T, V>::setInput(T &t
 
 template<typename T, typename V> void FacialLandmarkAdapter<T, V>::perform()
 {
-       _landmark_detection->preprocess(_source.inference_src);
-       _landmark_detection->inference(_source.inference_src);
+       shared_ptr<MetaInfo> metaInfo = _landmark_detection->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _landmark_detection->perform<unsigned char>(_source.inference_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _landmark_detection->perform<float>(_source.inference_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &FacialLandmarkAdapter<T, V>::getOutput()
index f19c857..bb83770 100644 (file)
@@ -263,34 +263,53 @@ void LandmarkDetection::prepare()
                throw InvalidOperation("Fail to load model files.");
 }
 
-void LandmarkDetection::preprocess(mv_source_h &mv_src)
+shared_ptr<MetaInfo> LandmarkDetection::getInputMetaInfo()
 {
-       LOGI("ENTER");
+       TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
+       IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
 
-       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-       vector<mv_source_h> mv_srcs = { mv_src };
+       // TODO. consider using multiple tensors later.
+       if (tensor_info_map.size() != 1)
+               throw InvalidOperation("Input tensor count not invalid.");
 
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+       auto tensor_buffer_iter = tensor_info_map.begin();
 
-       LOGI("LEAVE");
+       // Get the meta information corresponding to a given input tensor name.
+       return _parser->getInputMetaMap()[tensor_buffer_iter->first];
 }
 
-void LandmarkDetection::inference(mv_source_h source)
+template<typename T>
+void LandmarkDetection::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 {
        LOGI("ENTER");
 
-       vector<mv_source_h> sources;
+       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+
+       LOGI("LEAVE");
+}
 
-       sources.push_back(source);
+template<typename T> void LandmarkDetection::inference(vector<vector<T> > &inputVectors)
+{
+       LOGI("ENTER");
 
-       int ret = _inference->run();
+       int ret = _inference->run<T>(inputVectors);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to run inference");
 
        LOGI("LEAVE");
 }
 
+template<typename T> void LandmarkDetection::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
+{
+       vector<T> inputVector;
+
+       preprocess<T>(mv_src, metaInfo, inputVector);
+
+       vector<vector<T> > inputVectors = { inputVector };
+
+       inference<T>(inputVectors);
+}
+
 void LandmarkDetection::getOutputNames(vector<string> &names)
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
@@ -313,5 +332,15 @@ void LandmarkDetection::getOutputTensor(string target_name, vector<float> &tenso
        copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
 }
 
+template void LandmarkDetection::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                  vector<float> &inputVector);
+template void LandmarkDetection::inference<float>(vector<vector<float> > &inputVectors);
+template void LandmarkDetection::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
+template void LandmarkDetection::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                                  vector<unsigned char> &inputVector);
+template void LandmarkDetection::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
+template void LandmarkDetection::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
 }
 }
\ No newline at end of file
index 3dfce33..e88d6f7 100644 (file)
@@ -131,8 +131,13 @@ template<typename T, typename V> void PoseLandmarkAdapter<T, V>::setInput(T &t)
 
 template<typename T, typename V> void PoseLandmarkAdapter<T, V>::perform()
 {
-       _landmark_detection->preprocess(_source.inference_src);
-       _landmark_detection->inference(_source.inference_src);
+       shared_ptr<MetaInfo> metaInfo = _landmark_detection->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _landmark_detection->perform<unsigned char>(_source.inference_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _landmark_detection->perform<float>(_source.inference_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &PoseLandmarkAdapter<T, V>::getOutput()
index 584a243..7479a9d 100644 (file)
@@ -49,7 +49,8 @@ public:
        {}
        ~Preprocess() = default;
 
-       void run(std::vector<mv_source_h> &mv_srcs, MetaMap &tensorMetaInfo, IETensorBuffer &tensorBufferMap);
+       //void run(std::vector<mv_source_h> &mv_srcs, MetaMap &tensorMetaInfo, IETensorBuffer &tensorBufferMap);
+       template<typename T> void run(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
 
        std::vector<unsigned int> &getImageWidth()
        {
index e31b16a..5b66500 100644 (file)
@@ -198,6 +198,8 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
        JsonNode *preprocess_node = json_object_get_member(in_obj, "preprocess");
        JsonObject *preprocess_object = json_node_get_object(preprocess_node);
 
+       metaInfo->decodingTypeMap[DecodingType::NORMAL] = NULL;
+
        if (json_object_has_member(preprocess_object, "normalization")) {
                JsonNode *node = json_object_get_member(preprocess_object, "normalization");
                JsonObject *object = json_node_get_object(node);
@@ -224,6 +226,8 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
                metaInfo->decodingTypeMap[DecodingType::NORMAL] = static_pointer_cast<void>(normalization);
        }
 
+       metaInfo->decodingTypeMap[DecodingType::QUANTIZATION] = NULL;
+
        if (json_object_has_member(preprocess_object, "quantization")) {
                JsonNode *node = json_object_get_member(preprocess_object, "quantization");
                JsonObject *object = json_node_get_object(node);
index 3a953a2..aab2ce2 100644 (file)
@@ -98,10 +98,12 @@ int Preprocess::convertToCv(int given_type, int ch)
 
        switch (given_type) {
        case INFERENCE_TENSOR_DATA_TYPE_UINT8:
+       case MV_INFERENCE_DATA_UINT8:
                LOGI("Type is %d ch with UINT8", ch);
                type = ch == 1 ? CV_8UC1 : CV_8UC3;
                break;
        case INFERENCE_TENSOR_DATA_TYPE_FLOAT32:
+       case MV_INFERENCE_DATA_FLOAT32:
                LOGI("Type is %d ch with FLOAT32", ch);
                type = ch == 1 ? CV_32FC1 : CV_32FC3;
                break;
@@ -144,64 +146,63 @@ void Preprocess::convertToCvSource(vector<mv_source_h> &mv_srcs, vector<cv::Mat>
        }
 }
 
-void Preprocess::run(vector<mv_source_h> &mv_srcs, MetaMap &tensorMetaInfo, IETensorBuffer &tensorBufferMap)
+template<typename T> void Preprocess::run(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 {
        LOGI("ENTER");
 
        vector<cv::Mat> oriCvSources;
-       unsigned int src_idx = 0;
+       vector<mv_source_h> mv_srcs = { mv_src };
 
        _vImageWidth.clear();
        _vImageHeight.clear();
        convertToCvSource(mv_srcs, oriCvSources);
 
-       for (auto &it : tensorBufferMap) {
-               inference_engine_tensor_buffer &tensor_buffer = it.second;
-               mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
-               int ret = mv_source_get_colorspace(mv_srcs[src_idx], &colorspace);
-               if (ret != MEDIA_VISION_ERROR_NONE)
-                       throw InvalidOperation("Fail to get color space.");
-
-               shared_ptr<MetaInfo> metaInfo = tensorMetaInfo[it.first];
-               int data_type = convertToCv(tensor_buffer.data_type, metaInfo->getChannel());
+       inputVector.resize(metaInfo->getHeight() * metaInfo->getWidth() * metaInfo->getChannel());
 
-               // dest is a wrapper of the buffer
-               cv::Mat dest(cv::Size(metaInfo->getWidth(), metaInfo->getHeight()), data_type, tensor_buffer.buffer);
+       mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+       int ret = mv_source_get_colorspace(mv_srcs[0], &colorspace);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get color space.");
 
-               cv::Mat cvSource, cvDest;
+       int data_type = convertToCv(metaInfo->dataType, metaInfo->getChannel());
+       // dest is a wrapper of the buffer.
+       cv::Mat dest(cv::Size(metaInfo->getWidth(), metaInfo->getHeight()), data_type, inputVector.data());
+       cv::Mat cvSource, cvDest;
 
-               // cvSource has new allocation with dest.size()
-               cv::resize(oriCvSources[src_idx++], cvSource, dest.size());
+       // cvSource has new allocation with dest.size()
+       cv::resize(oriCvSources[0], cvSource, dest.size());
 
-               // cvDest has new allocation if it's colorSpace is not RGB888
-               // cvDest share the data with cvSource it's colorSpace is RGB888
-               colorConvert(cvSource, cvDest, colorspace, metaInfo->colorSpace);
+       // cvDest has new allocation if it's colorSpace is not RGB888
+       // cvDest share the data with cvSource it's colorSpace is RGB888
+       colorConvert(cvSource, cvDest, colorspace, metaInfo->colorSpace);
 
-               cvDest.convertTo(dest, dest.type());
+       cvDest.convertTo(dest, dest.type());
 
-               try {
-                       auto normalization =
-                                       static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+       try {
+               auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
 
-                       if (normalization && normalization->use)
-                               normalize(dest, dest, normalization->mean, normalization->std);
-               } catch (const std::exception &e) {
-                       LOGI("No normalization node.");
-               }
+               if (normalization && normalization->use)
+                       normalize(dest, dest, normalization->mean, normalization->std);
+       } catch (const std::exception &e) {
+               LOGI("No normalization node.");
+       }
 
-               try {
-                       auto quantization =
-                                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       try {
+               auto quantization =
+                               static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
 
-                       if (quantization && quantization->use)
-                               quantize(dest, dest, quantization->scale, quantization->zeropoint);
-               } catch (const std::exception &e) {
-                       LOGI("No quantization node.");
-               }
+               if (quantization && quantization->use)
+                       quantize(dest, dest, quantization->scale, quantization->zeropoint);
+       } catch (const std::exception &e) {
+               LOGI("No quantization node.");
        }
 
        LOGI("LEAVE");
 }
 
+template void Preprocess::run<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<float> &inputVector);
+template void Preprocess::run<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                        vector<unsigned char> &inputVector);
+
 } /* machine_learning */
 } /* mediavision */
index 8714c4f..6bcbfb0 100644 (file)
@@ -58,6 +58,9 @@ protected:
 
        void getOutputNames(std::vector<std::string> &names);
        void getOutputTensor(std::string target_name, std::vector<float> &tensor);
+       template<typename T>
+       void preprocess(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
+       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
 
 public:
        ObjectDetection(ObjectDetectionTaskType task_type);
@@ -69,11 +72,11 @@ public:
        void getEngineType(unsigned int engine_index, char **engine_type);
        void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
        void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
+       std::shared_ptr<MetaInfo> getInputMetaInfo();
        void parseMetaFile(const char *meta_file_name);
        void configure();
        void prepare();
-       void preprocess(mv_source_h &mv_src);
-       void inference(mv_source_h source);
+       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
        virtual ObjectDetectionResult &result() = 0;
 };
 
index ad5356e..2b6abaa 100644 (file)
@@ -129,8 +129,13 @@ template<typename T, typename V> void FaceDetectionAdapter<T, V>::setInput(T &t)
 
 template<typename T, typename V> void FaceDetectionAdapter<T, V>::perform()
 {
-       _object_detection->preprocess(_source.inference_src);
-       _object_detection->inference(_source.inference_src);
+       shared_ptr<MetaInfo> metaInfo = _object_detection->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _object_detection->perform<unsigned char>(_source.inference_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _object_detection->perform<float>(_source.inference_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &FaceDetectionAdapter<T, V>::getOutput()
index 752439b..e411bb8 100644 (file)
@@ -19,6 +19,7 @@
 #include <map>
 #include <memory>
 #include <algorithm>
+#include <iostream>
 
 #include "machine_learning_exception.h"
 #include "mv_machine_learning_common.h"
@@ -257,34 +258,53 @@ void ObjectDetection::prepare()
                throw InvalidOperation("Fail to load model files.");
 }
 
-void ObjectDetection::preprocess(mv_source_h &mv_src)
+shared_ptr<MetaInfo> ObjectDetection::getInputMetaInfo()
 {
-       LOGI("ENTER");
+       TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
+       IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
 
-       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-       vector<mv_source_h> mv_srcs = { mv_src };
+       // TODO. consider using multiple tensors later.
+       if (tensor_info_map.size() != 1)
+               throw InvalidOperation("Input tensor count not invalid.");
 
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+       auto tensor_buffer_iter = tensor_info_map.begin();
 
-       LOGI("LEAVE");
+       // Get the meta information corresponding to a given input tensor name.
+       return _parser->getInputMetaMap()[tensor_buffer_iter->first];
 }
 
-void ObjectDetection::inference(mv_source_h source)
+template<typename T>
+void ObjectDetection::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 {
        LOGI("ENTER");
 
-       vector<mv_source_h> sources;
+       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+
+       LOGI("LEAVE");
+}
 
-       sources.push_back(source);
+template<typename T> void ObjectDetection::inference(vector<vector<T> > &inputVectors)
+{
+       LOGI("ENTER");
 
-       int ret = _inference->run();
+       int ret = _inference->run<T>(inputVectors);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to run inference");
 
        LOGI("LEAVE");
 }
 
+template<typename T> void ObjectDetection::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
+{
+       vector<T> inputVector;
+
+       preprocess<T>(mv_src, metaInfo, inputVector);
+
+       vector<vector<T> > inputVectors = { inputVector };
+
+       inference<T>(inputVectors);
+}
+
 void ObjectDetection::getOutputNames(vector<string> &names)
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
@@ -307,5 +327,15 @@ void ObjectDetection::getOutputTensor(string target_name, vector<float> &tensor)
        copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
 }
 
+template void ObjectDetection::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                vector<float> &inputVector);
+template void ObjectDetection::inference<float>(vector<vector<float> > &inputVectors);
+template void ObjectDetection::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
+template void ObjectDetection::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                                vector<unsigned char> &inputVector);
+template void ObjectDetection::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
+template void ObjectDetection::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
 }
 }
\ No newline at end of file
index adec879..efd1073 100644 (file)
@@ -135,8 +135,13 @@ template<typename T, typename V> void ObjectDetectionAdapter<T, V>::setInput(T &
 
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::perform()
 {
-       _object_detection->preprocess(_source.inference_src);
-       _object_detection->inference(_source.inference_src);
+       shared_ptr<MetaInfo> metaInfo = _object_detection->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _object_detection->perform<unsigned char>(_source.inference_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _object_detection->perform<float>(_source.inference_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &ObjectDetectionAdapter<T, V>::getOutput()
index 17d7b1f..62e332d 100644 (file)
@@ -57,6 +57,9 @@ protected:
 
        void getOutputNames(std::vector<std::string> &names);
        void getOutputTensor(std::string &target_name, std::vector<float> &tensor);
+       template<typename T>
+       void preprocess(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
+       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
 
 public:
        ObjectDetection3d(ObjectDetection3dTaskType task_type);
@@ -69,11 +72,11 @@ public:
        void getEngineType(unsigned int engine_index, char **engine_type);
        void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
        void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
+       std::shared_ptr<MetaInfo> getInputMetaInfo();
        void parseMetaFile();
        void configure();
        void prepare();
-       void preprocess(mv_source_h &mv_src);
-       void inference(mv_source_h source);
+       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
        virtual ObjectDetection3dResult &result() = 0;
 };
 
index fdad14c..86e49d9 100644 (file)
@@ -222,34 +222,54 @@ void ObjectDetection3d::prepare()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to load model files.");
 }
-void ObjectDetection3d::preprocess(mv_source_h &mv_src)
+
+shared_ptr<MetaInfo> ObjectDetection3d::getInputMetaInfo()
 {
-       LOGI("ENTER");
+       TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
+       IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
 
-       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-       vector<mv_source_h> mv_srcs = { mv_src };
+       // TODO. consider using multiple tensors later.
+       if (tensor_info_map.size() != 1)
+               throw InvalidOperation("Input tensor count not invalid.");
 
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+       auto tensor_buffer_iter = tensor_info_map.begin();
 
-       LOGI("LEAVE");
+       // Get the meta information corresponding to a given input tensor name.
+       return _parser->getInputMetaMap()[tensor_buffer_iter->first];
 }
 
-void ObjectDetection3d::inference(mv_source_h source)
+template<typename T>
+void ObjectDetection3d::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 {
        LOGI("ENTER");
 
-       vector<mv_source_h> sources;
+       _preprocess.run<T>(mv_src, metaInfo, inputVector);
 
-       sources.push_back(source);
+       LOGI("LEAVE");
+}
+
+template<typename T> void ObjectDetection3d::inference(vector<vector<T> > &inputVectors)
+{
+       LOGI("ENTER");
 
-       int ret = _inference->run();
+       int ret = _inference->run<T>(inputVectors);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to run inference");
 
        LOGI("LEAVE");
 }
 
+template<typename T> void ObjectDetection3d::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
+{
+       vector<T> inputVector;
+
+       preprocess<T>(mv_src, metaInfo, inputVector);
+
+       vector<vector<T> > inputVectors = { inputVector };
+
+       inference<T>(inputVectors);
+}
+
 void ObjectDetection3d::getOutputNames(vector<string> &names)
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
@@ -276,5 +296,15 @@ void ObjectDetection3d::getOutputTensor(string &target_name, vector<float> &tens
        LOGI("LEAVE");
 }
 
+template void ObjectDetection3d::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                  vector<float> &inputVector);
+template void ObjectDetection3d::inference<float>(vector<vector<float> > &inputVectors);
+template void ObjectDetection3d::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
+template void ObjectDetection3d::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
+                                                                                                                  vector<unsigned char> &inputVector);
+template void ObjectDetection3d::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
+template void ObjectDetection3d::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
+
 }
 }
\ No newline at end of file
index ad9f51b..0de51a2 100644 (file)
@@ -115,8 +115,13 @@ template<typename T, typename V> void ObjectDetection3dAdapter<T, V>::setInput(T
 
 template<typename T, typename V> void ObjectDetection3dAdapter<T, V>::perform()
 {
-       _object_detection_3d->preprocess(_source.inference_src);
-       _object_detection_3d->inference(_source.inference_src);
+       shared_ptr<MetaInfo> metaInfo = _object_detection_3d->getInputMetaInfo();
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               _object_detection_3d->perform<unsigned char>(_source.inference_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               _object_detection_3d->perform<float>(_source.inference_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
 }
 
 template<typename T, typename V> V &ObjectDetection3dAdapter<T, V>::getOutput()