mv_machine_learning: add TinyTrackerS model support 00/318100/2 sandbox/inki.dae/10.0_acr
authorInki Dae <inki.dae@samsung.com>
Tue, 22 Oct 2024 23:54:59 +0000 (08:54 +0900)
committerInki Dae <inki.dae@samsung.com>
Mon, 13 Jan 2025 23:16:41 +0000 (08:16 +0900)
Add TinyTrackerS model support which is one of Gaze Estimation models,
and a light-weight version for embedded devices.

This model needs int8 data type as input tensor so what this patch does,
 - add int8 tensor data type support
 - update normalize() and quantize() for supporting int8 tensor data type
 - change the default model for Gaze Tracking task group to TinyTracker

Change-Id: I73c0b519fa0eccdd82ece2ef38c4748d26f26331
Signed-off-by: Inki Dae <inki.dae@samsung.com>
14 files changed:
include/mv_inference_type.h
mv_machine_learning/common/meta/include/MvMlPreprocess.h
mv_machine_learning/common/meta/src/MetaParser.cpp
mv_machine_learning/common/meta/src/MvMlPreprocess.cpp
mv_machine_learning/gaze_tracking/include/GazeTrackingAdapter.h
mv_machine_learning/gaze_tracking/include/TinyTracker.h [new file with mode: 0644]
mv_machine_learning/gaze_tracking/include/gaze_tracking_type.h
mv_machine_learning/gaze_tracking/meta/gaze_tracking.json
mv_machine_learning/gaze_tracking/src/GGazeEstimation.cpp
mv_machine_learning/gaze_tracking/src/GazeTracking.cpp
mv_machine_learning/gaze_tracking/src/GazeTrackingAdapter.cpp
mv_machine_learning/gaze_tracking/src/L2CSNet.cpp
mv_machine_learning/gaze_tracking/src/TinyTracker.cpp [new file with mode: 0644]
mv_machine_learning/inference/src/Inference.cpp

index b3b29f7d0c87c1d8596e37aa872dc024bcb44eac..b337ee96ed0c4f1d6061bdbb20480306c636444b 100644 (file)
@@ -96,7 +96,8 @@ typedef enum {
  */
 typedef enum {
        MV_INFERENCE_DATA_FLOAT32 = 0, /**< Data type of a given pre-trained model is float. */
-       MV_INFERENCE_DATA_UINT8 /**< Data type of a given pre-trained model is unsigned char. */
+       MV_INFERENCE_DATA_UINT8, /**< Data type of a given pre-trained model is unsigned char. */
+       MV_INFERENCE_DATA_INT8 /**< Data type of a given pre-trained model is signed char. (Since 10.0) */
 } mv_inference_data_type_e;
 
 /**
index 1d2f58d7b0eba441ff881893e59fd63487c4c726..20128df715df6de1e52cc4145e346b18cb847ace 100644 (file)
@@ -82,9 +82,8 @@ private:
        int convertToCv(int given_type, int ch);
        void colorConvert(cv::Mat &source, cv::Mat &dest, int sType, int dType);
        void convertToCvSource(std::vector<mv_source_h> &mv_srcs, std::vector<cv::Mat> &cv_srcs);
-       void normalize(cv::Mat &source, cv::Mat &dest, const std::vector<double> &mean, const std::vector<double> &std);
-       void quantize(cv::Mat &source, cv::Mat &dest, const std::vector<double> &scale,
-                                 const std::vector<double> &zeropoint);
+       void normalize(cv::Mat &source, cv::Mat &dest, bool signedInt8 = false);
+       void quantize(cv::Mat &source, cv::Mat &dest, bool signedInt8 = false);
 };
 
 } /* machine_learning */
index e85056ff6284f095b85c50f89d0ee58e5f05c48f..d3134000221395eee9152d0317ecea17e0a53f31 100644 (file)
@@ -33,7 +33,8 @@ std::map<std::string, inference_tensor_shape_type_e> gSupportedShapeType = { { "
                                                                                                                                                         { "NHWC", INFERENCE_TENSOR_SHAPE_NHWC } };
 
 std::map<std::string, mv_inference_data_type_e> gSupportedDataType = { { "FLOAT32", MV_INFERENCE_DATA_FLOAT32 },
-                                                                                                                                          { "UINT8", MV_INFERENCE_DATA_UINT8 } };
+                                                                                                                                          { "UINT8", MV_INFERENCE_DATA_UINT8 },
+                                                                                                                                          { "INT8", MV_INFERENCE_DATA_INT8 } };
 
 std::map<std::string, mv_colorspace_e> gSupportedColorType = { { "RGB888", MEDIA_VISION_COLORSPACE_RGB888 },
                                                                                                                           { "GRAY8", MEDIA_VISION_COLORSPACE_Y800 } };
index fe849990bb760023d62de7f7bb5eb8aef91f8cb5..fb72bf65543439184da9752a2af9deb10d17089e 100644 (file)
@@ -62,14 +62,23 @@ void Preprocess::colorConvert(cv::Mat &source, cv::Mat &dest, int sType, int dTy
        LOGI("LEAVE");
 }
 
-void Preprocess::normalize(cv::Mat &source, cv::Mat &dest, const vector<double> &mean, const vector<double> &std)
+void Preprocess::normalize(cv::Mat &source, cv::Mat &dest, bool signedInt8)
 {
        LOGI("ENTER");
 
        try {
-               cv::subtract(source, cv::Scalar(mean[0], mean[1], mean[2]), dest);
-               source = dest;
-               cv::divide(source, cv::Scalar(std[0], std[1], std[2]), dest);
+               if (!signedInt8) {
+                       cv::subtract(source, cv::Scalar(_config.mean[0], _config.mean[1], _config.mean[2]), dest);
+                       source = dest;
+                       cv::divide(source, cv::Scalar(_config.std[0], _config.std[1], _config.std[2]), dest);
+               } else {
+                       std::vector<float> fData;
+
+                       for (int idx = 0; idx < source.cols * source.rows; ++idx) {
+                               fData.push_back(static_cast<float>(source.data[idx]) - _config.mean[0]);
+                               dest.data[idx] = static_cast<char>(static_cast<int>(fData[idx]));
+                       }
+               }
        } catch (cv::Exception &e) {
                throw InvalidOperation("Fail to substract/divide");
        }
@@ -77,14 +86,23 @@ void Preprocess::normalize(cv::Mat &source, cv::Mat &dest, const vector<double>
        LOGI("LEAVE");
 }
 
-void Preprocess::quantize(cv::Mat &source, cv::Mat &dest, const vector<double> &scale, const vector<double> &zeropoint)
+void Preprocess::quantize(cv::Mat &source, cv::Mat &dest, bool signedInt8)
 {
        LOGI("ENTER");
 
        try {
-               cv::subtract(source, cv::Scalar(zeropoint[0], zeropoint[1], zeropoint[2]), dest);
-               source = dest;
-               cv::multiply(source, cv::Scalar(scale[0], scale[1], scale[2]), dest);
+               if (!signedInt8) {
+                       cv::subtract(source, cv::Scalar(_config.zeropoint[0], _config.zeropoint[1], _config.zeropoint[2]), dest);
+                       source = dest;
+                       cv::multiply(source, cv::Scalar(_config.scale[0], _config.scale[1], _config.scale[2]), dest);
+               } else {
+                       std::vector<float> fData;
+
+                       for (int idx = 0; idx < source.cols * source.rows; ++idx) {
+                               fData.push_back(static_cast<float>(source.data[idx]) - _config.zeropoint[0]);
+                               dest.data[idx] = fData[idx] * _config.scale[0];
+                       }
+               }
        } catch (cv::Exception &e) {
                throw InvalidOperation("Fail to subtract/multiply");
        }
@@ -97,20 +115,21 @@ int Preprocess::convertToCv(int given_type, int ch)
        int type = 0;
 
        switch (given_type) {
-       case INFERENCE_TENSOR_DATA_TYPE_UINT8:
        case MV_INFERENCE_DATA_UINT8:
                LOGI("Type is %d ch with UINT8", ch);
                type = ch == 1 ? CV_8UC1 : CV_8UC3;
                break;
-       case INFERENCE_TENSOR_DATA_TYPE_FLOAT32:
+       case MV_INFERENCE_DATA_INT8:
+               LOGI("Type is %d ch with INT8", ch);
+               type = ch == 1 ? CV_8SC1 : CV_8SC3;
+               break;
        case MV_INFERENCE_DATA_FLOAT32:
                LOGI("Type is %d ch with FLOAT32", ch);
                type = ch == 1 ? CV_32FC1 : CV_32FC3;
                break;
        default:
-               LOGI("unknown data type so FLOAT32 data type will be used in default");
-               type = ch == 1 ? CV_32FC1 : CV_32FC3;
-               break;
+               LOGE("It's unknown data type(%d)", given_type);
+               throw InvalidParameter("Unknown data type.");
        }
 
        return type;
@@ -163,8 +182,8 @@ template<typename T> void Preprocess::run(mv_source_h &mv_src, vector<T> &inputV
        convertToCvSource(mv_srcs, oriCvSources);
 
        inputVector.resize(_config.output_height * _config.output_width * _config.output_channel);
-
        int data_type = convertToCv(_config.output_data_type, _config.output_channel);
+
        // dest is a wrapper of the buffer.
        cv::Mat dest(cv::Size(_config.output_width, _config.output_height), data_type, inputVector.data());
        cv::Mat cvSource, cvDest;
@@ -172,6 +191,8 @@ template<typename T> void Preprocess::run(mv_source_h &mv_src, vector<T> &inputV
        // cvSource has new allocation with dest.size()
        cv::resize(oriCvSources[0], cvSource, dest.size());
 
+       bool signedInt8DataType = (data_type == CV_8SC1 || data_type == CV_8SC3);
+
        if (_config.skip_csc) {
                cvSource.convertTo(dest, dest.type());
        } else {
@@ -183,20 +204,35 @@ template<typename T> void Preprocess::run(mv_source_h &mv_src, vector<T> &inputV
                // cvDest is allocated if colorspace is not RGB888, and
                // cvDest shares the data with cvSource if the colorspace is RGB888.
                colorConvert(cvSource, cvDest, colorspace, _config.output_format);
-               cvDest.convertTo(dest, dest.type());
+
+               // Convert data to a given data type only in case that the output data type isn't signed int8 type.
+               // If the output data type is signed int8 type, it should be converted after normalization
+               // because the normalized value can be negative. The range of signed int8 data type is -128 ~ 127
+               // so the pixel values more then 127 cannot be converted correctly.
+               if (!signedInt8DataType)
+                       cvDest.convertTo(dest, dest.type());
        }
 
-       if (_config.normalize)
-               normalize(dest, dest, _config.mean, _config.std);
+       if (_config.normalize) {
+               if (!signedInt8DataType)
+                       normalize(dest, dest);
+               else
+                       normalize(cvDest, dest, true);
+       }
 
-       if (_config.quantize)
-               quantize(dest, dest, _config.scale, _config.zeropoint);
+       if (_config.quantize) {
+               if (!signedInt8DataType || _config.normalize)
+                       quantize(dest, dest);
+               else // If the output data type is signed int8 type then dest is empty and cvDest is used instead of dest.
+                       quantize(cvDest, dest, true);
+       }
 
        LOGI("LEAVE");
 }
 
 template void Preprocess::run<float>(mv_source_h &mv_src, vector<float> &inputVector);
 template void Preprocess::run<unsigned char>(mv_source_h &mv_src, vector<unsigned char> &inputVector);
+template void Preprocess::run<char>(mv_source_h &mv_src, vector<char> &inputVector);
 
 } /* machine_learning */
 } /* mediavision */
index 136d47414f35e08482a090568ed9071eba33b00c..6a27c4f0f811700cabcbc84896690e6540fd96d8 100644 (file)
@@ -20,9 +20,9 @@
 #include <dlog.h>
 
 #include "EngineConfig.h"
+#include "IGazeTracking.h"
 #include "ITask.h"
 #include "MvMlConfig.h"
-#include "IGazeTracking.h"
 namespace mediavision
 {
 namespace machine_learning
diff --git a/mv_machine_learning/gaze_tracking/include/TinyTracker.h b/mv_machine_learning/gaze_tracking/include/TinyTracker.h
new file mode 100644 (file)
index 0000000..245ed74
--- /dev/null
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TinyTracker_H__
+#define __TinyTracker_H__
+
+#include "mv_private.h"
+#include <memory>
+#include <mv_common.h>
+#include <string>
+
+#include "GazeTracking.h"
+#include <mv_inference_type.h>
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T> class TinyTracker : public GazeTracking<T>
+{
+       using GazeTracking<T>::_config;
+       using GazeTracking<T>::_preprocess;
+
+private:
+       GazeTrackingResult _result;
+
+public:
+       TinyTracker(GazeTrackingTaskType task_type, std::shared_ptr<Config> config);
+       ~TinyTracker();
+
+       GazeTrackingResult &result() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
index ea462f12e5a9365697527aaa8bd07b49945b8b9a..0f26953a31e36f47019030a9804bad48a5b017d9 100644 (file)
@@ -49,7 +49,8 @@ struct GazeTrackingResult : public OutputBaseType {
 enum class GazeTrackingTaskType {
        GAZE_TRACKINGION_TASK_NONE = 0,
        L2CS_NET,
-       G_GAZE_ESTIMATION
+       G_GAZE_ESTIMATION,
+       TINY_TRACKER
        // TODO
 };
 
index d42ec2fe1617ba608e20d0de99ce9cfd803e0891..4236faf59f8e951620c276ebff8d6e27a4ce6558 100644 (file)
@@ -9,17 +9,17 @@
                {
             "name"  : "MODEL_FILE_NAME",
             "type"  : "string",
-            "value" : "generalizing_gaze_estimation_with_weak_supervision_from_synthetic_views_160x160_float16.tflite"
+            "value" : "TinyTrackerS.tflite"
         },
         {
             "name"  : "DEFAULT_MODEL_NAME",
             "type"  : "string",
-            "value" : "G_GAZE_ESTIMATION"
+            "value" : "TINY_TRACKER"
         },
         {
             "name"  : "MODEL_META_FILE_NAME",
             "type"  : "string",
-            "value" : "generalizing_gaze_estimation_with_weak_supervision_from_synthetic_views_160x160_float16.json"
+            "value" : "TinyTrackerS.json"
         },
         {
             "name"  : "BACKEND_TYPE",
index 3b9a87874f648d65e9ba60b4b32b6fb3ca240117..94a13aaf17df8557cf053d47f94deaa4f08e6689 100644 (file)
@@ -62,6 +62,7 @@ template<typename T> GazeTrackingResult &GGazeEstimation<T>::result()
 
 template class GGazeEstimation<float>;
 template class GGazeEstimation<unsigned char>;
+template class GGazeEstimation<char>;
 
 }
 }
index 455ab24c776fafb7c87bb67543d9eb076d56eb3b..b28dc11c6f0af5d562c2b3f813c25649d71387ef 100644 (file)
@@ -311,6 +311,7 @@ template<typename T> void GazeTracking<T>::getOutputTensor(string target_name, v
 
 template class GazeTracking<float>;
 template class GazeTracking<unsigned char>;
+template class GazeTracking<char>;
 
 }
 }
index 775a6ab81d56dcce4fab1fb87afa1724aa9de32d..89c64e2c3465b942e085eb333e06f61084f2726e 100644 (file)
  */
 
 #include "GazeTrackingAdapter.h"
+#include "GGazeEstimation.h"
+#include "L2CSNet.h"
 #include "MvMlException.h"
+#include "TinyTracker.h"
 #include "gaze_tracking_type.h"
 #include "mv_gaze_tracking_config.h"
-#include "L2CSNet.h"
-#include "GGazeEstimation.h"
 
 using namespace std;
 using namespace MediaVision::Common;
@@ -57,6 +58,9 @@ template<typename U> void GazeTrackingAdapter::create(GazeTrackingTaskType task_
        case GazeTrackingTaskType::G_GAZE_ESTIMATION:
                _gaze_tracking = make_unique<GGazeEstimation<U> >(task_type, _config);
                break;
+       case GazeTrackingTaskType::TINY_TRACKER:
+               _gaze_tracking = make_unique<TinyTracker<U> >(task_type, _config);
+               break;
        default:
                throw InvalidOperation("Invalid gaze tracking task type.");
        }
@@ -74,6 +78,9 @@ void GazeTrackingAdapter::create(const string &model_name)
        case MV_INFERENCE_DATA_UINT8:
                create<unsigned char>(task_type);
                break;
+       case MV_INFERENCE_DATA_INT8:
+               create<char>(task_type);
+               break;
        case MV_INFERENCE_DATA_FLOAT32:
                create<float>(task_type);
                break;
@@ -93,6 +100,8 @@ GazeTrackingTaskType GazeTrackingAdapter::convertToTaskType(string model_name)
                return GazeTrackingTaskType::L2CS_NET;
        if (model_name == "G_GAZE_ESTIMATION")
                return GazeTrackingTaskType::G_GAZE_ESTIMATION;
+       if (model_name == "TINY_TRACKER")
+               return GazeTrackingTaskType::TINY_TRACKER;
        // TODO.
 
        throw InvalidParameter("Invalid gaze tracking model name.");
index ec537149e365f74f968eb0afc0d6d5895b36b86d..19fc3892fb6d50565f0a1e36212341180b557907 100644 (file)
@@ -70,6 +70,6 @@ template<typename T> GazeTrackingResult &L2CSNet<T>::result()
 
 template class L2CSNet<float>;
 template class L2CSNet<unsigned char>;
-
+template class L2CSNet<char>;
 }
 }
diff --git a/mv_machine_learning/gaze_tracking/src/TinyTracker.cpp b/mv_machine_learning/gaze_tracking/src/TinyTracker.cpp
new file mode 100644 (file)
index 0000000..4bae8e6
--- /dev/null
@@ -0,0 +1,80 @@
+/**
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <string.h>
+
+#include "MvMlException.h"
+#include "Postprocess.h"
+#include "TinyTracker.h"
+#include "mv_gaze_tracking_config.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T>
+TinyTracker<T>::TinyTracker(GazeTrackingTaskType task_type, std::shared_ptr<Config> config)
+               : GazeTracking<T>(task_type, config), _result()
+{}
+
+template<typename T> TinyTracker<T>::~TinyTracker()
+{}
+
+template<typename T> GazeTrackingResult &TinyTracker<T>::result()
+{
+       // Clear _result object because result() function can be called every time user wants
+       // so make sure to clear existing result data before getting the data again.
+       _result = GazeTrackingResult();
+
+       vector<string> names;
+
+       GazeTracking<T>::getOutputNames(names);
+
+       vector<float> outputTensor;
+
+       LOGD("names size = %zu", names.size());
+       for (unsigned int idx = 0; idx < names.size(); ++idx)
+               LOGD("%s", names[idx].c_str());
+
+       GazeTracking<T>::getOutputTensor(names[0], outputTensor);
+       float x = outputTensor[0];
+       float y = outputTensor[1];
+
+       LOGD("TinyTracker: x: %f, y: %f", x, y);
+
+       _result.frame_number++;
+       _result.number_of_faces = 1;
+       _result.x_pos.push_back(x * 10.0f);
+       _result.y_pos.push_back(y * 10.0f);
+       _result.yaws.push_back(0.0f);
+       _result.pitches.push_back(0.0f);
+
+       return _result;
+}
+
+template class TinyTracker<float>;
+template class TinyTracker<unsigned char>;
+template class TinyTracker<char>;
+
+}
+}
index 69145f3af2e43a2e1b3e00054acc4c7452e11370..5b7fae3721e1fb3a9c9af417ff34c5a1add162fa 100644 (file)
@@ -196,6 +196,10 @@ int Inference::convertToCv(int given_type)
                LOGI("Type is %d ch with UINT8", ch);
                type = ch == 1 ? CV_8UC1 : CV_8UC3;
                break;
+       case INFERENCE_TENSOR_DATA_TYPE_INT8:
+               LOGI("Type is %d ch with INT8", ch);
+               type = ch == 1 ? CV_8SC1 : CV_8SC3;
+               break;
        case INFERENCE_TENSOR_DATA_TYPE_FLOAT32:
                LOGI("Type is %d ch with FLOAT32", ch);
                type = ch == 1 ? CV_32FC1 : CV_32FC3;
@@ -220,6 +224,9 @@ inference_tensor_data_type_e Inference::convertToIE(int given_type)
        case MV_INFERENCE_DATA_UINT8:
                type = INFERENCE_TENSOR_DATA_TYPE_UINT8;
                break;
+       case MV_INFERENCE_DATA_INT8:
+               type = INFERENCE_TENSOR_DATA_TYPE_INT8;
+               break;
        default:
                LOGI("unknown data type so FLOAT32 data type will be used in default");
                break;
@@ -1722,6 +1729,7 @@ int Inference::getPoseLandmarkDetectionResults(std::unique_ptr<mv_inference_pose
 
 template int Inference::run<float>(std::vector<std::vector<float> > &input_tensors);
 template int Inference::run<unsigned char>(std::vector<std::vector<unsigned char> > &input_tensors);
+template int Inference::run<char>(std::vector<std::vector<char> > &input_tensors);
 
 } /* Inference */
 } /* MediaVision */