mv_machine_learning/mv_inference/inference/include/Inference.h

   1 /**
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef __MEDIA_VISION_INFERENCE_H__
  18 #define __MEDIA_VISION_INFERENCE_H__
  19
  20 #include <string>
  21 #include <map>
  22
  23 #include "mv_common.h"
  24 #include "inference_engine_error.h"
  25 #include "inference_engine_common_impl.h"
  26 #include "mv_inference_private.h"
  27 #include <mv_inference_type.h>
  28 #include <opencv2/core.hpp>
  29 #include <opencv2/imgproc.hpp>
  30 #include "Metadata.h"
  31 #include "PreProcess.h"
  32 #include "PostProcess.h"
  33 #include "TensorBuffer.h"
  34
  35 #include "Landmark.h"
  36
  37 #define HUMAN_POSE_MAX_LANDMARKS 16
  38 #define HUMAN_POSE_MAX_PARTS 6
  39
  40 /**
  41  * @file Inference.h
  42  * @brief This file contains the inference class definition which
  43  *        provides inference interface.
  44  */
  45 using namespace InferenceEngineInterface::Common;
  46
  47 typedef struct _ImageClassficationResults {
  48         int number_of_classes;
  49         std::vector<int> indices;
  50         std::vector<std::string> names;
  51         std::vector<float> confidences;
  52 } ImageClassificationResults; /**< structure ImageClassificationResults */
  53
  54 typedef struct _ObjectDetectionResults {
  55         int number_of_objects;
  56         std::vector<int> indices;
  57         std::vector<std::string> names;
  58         std::vector<float> confidences;
  59         std::vector<cv::Rect> locations;
  60 } ObjectDetectionResults; /**< structure ObjectDetectionResults */
  61
  62 typedef struct _FaceDetectionResults {
  63         int number_of_faces;
  64         std::vector<float> confidences;
  65         std::vector<cv::Rect> locations;
  66 } FaceDetectionResults; /**< structure FaceDetectionResults */
  67
  68 typedef struct _FacialLandMarkDetectionResults {
  69         int number_of_landmarks;
  70         std::vector<cv::Point> locations;
  71 } FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */
  72
  73 typedef struct _PoseLandmarkDetectionResults {
  74         int number_of_landmarks;
  75         std::vector<cv::Point2f> locations;
  76         std::vector<float> score;
  77 } PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */
  78
  79 namespace mediavision
  80 {
  81 namespace inference
  82 {
  83         struct TensorInfo {
  84                 int width;
  85                 int height;
  86                 int dim;
  87                 int ch;
  88         };
  89
  90         struct InferenceConfig {
  91                 /**
  92                  * @brief Default constructor for the @ref InferenceConfig
  93                  *
  94                  * @since_tizen 5.0
  95                  */
  96                 InferenceConfig();
  97
  98                 std::string mConfigFilePath; /**< Path of a model configuration file */
  99
 100                 std::string mWeightFilePath; /**< Path of a model weight file */
 101
 102                 std::string mUserFilePath; /**< Path of model user file */
 103
 104                 TensorInfo mTensorInfo; /**< Tensor information */
 105
 106                 mv_inference_data_type_e mDataType; /**< Data type of a input tensor */
 107
 108                 mv_inference_backend_type_e mBackedType; /**< Backed type of model files */
 109
 110                 int mTargetTypes; /**< Target type to run inference */
 111
 112                 double mConfidenceThresHold; /**< Confidence threshold value */
 113
 114                 double mMeanValue; /**< The mean value for normalization */
 115
 116                 double mStdValue; /**< The scale factor value for normalization */
 117
 118                 int mMaxOutputNumbers;
 119
 120                 std::vector<std::string> mInputLayerNames; /**< The input layer names */
 121                 std::vector<std::string> mOutputLayerNames; /**< The output layer names */
 122         };
 123
 124         class Inference
 125         {
 126         public:
 127                 /**
 128                  * @brief   Creates an Inference class instance.
 129                  *
 130                  * @since_tizen 5.5
 131                  */
 132                 Inference();
 133
 134                 /**
 135                  * @brief   Destroys an Inference class instance including
 136                  *           its all resources.
 137                  *
 138                  * @since_tizen 5.5
 139                  */
 140                 ~Inference();
 141
 142                 /**
 143                  * @brief   Configure modelfiles
 144                  *
 145                  * @since_tizen 5.5
 146                  */
 147                 void ConfigureModelFiles(const std::string modelConfigFilePath,
 148                                                                  const std::string modelWeightFilePath,
 149                                                                  const std::string modelUserFilePath);
 150
 151                 /**
 152                  * @brief   Configure input tensor information
 153                  *
 154                  * @since_tizen 5.5
 155                  * @remarks deprecated Replayced by ConfigureInputInfo
 156                  */
 157                 void ConfigureTensorInfo(int width, int height, int dim, int ch,
 158                                                                  double stdValue, double meanValue);
 159
 160                 /**
 161                  * @brief Configure input information
 162                  *
 163                  * @since_tizen 6.0
 164                  */
 165                 void ConfigureInputInfo(int width, int height, int dim, int ch,
 166                                                                 double stdValue, double meanValue, int dataType,
 167                                                                 const std::vector<std::string> names);
 168
 169                 void ConfigureOutputInfo(std::vector<std::string> names);
 170
 171                 /**
 172                  * @brief   Configure inference backend type.
 173                  *
 174                  * @since_tizen 6.0
 175                  */
 176                 int ConfigureBackendType(const mv_inference_backend_type_e backendType);
 177
 178                 /**
 179                  * @brief   Configure a inference target device type such as CPU, GPU or NPU. (only one type can be set)
 180                  * @details Internally, a given device type will be converted to new type.
 181                  *                      This API is just used for backward compatibility.
 182                  *
 183                  * @since_tizen 6.0 (Deprecated)
 184                  */
 185                 int ConfigureTargetTypes(int targetType, bool isNewVersion);
 186
 187                 /**
 188                  * @brief   Configure inference target devices such as CPU, GPU or NPU. (one more types can be combined)
 189                  *
 190                  * @since_tizen 6.0
 191                  */
 192                 int ConfigureTargetDevices(const int targetDevices);
 193
 194                 /**
 195                  * @brief   Check supported target devices
 196                  *
 197                  * @since_tizen 6.5
 198                  */
 199                 bool IsTargetDeviceSupported(const int targetDevices);
 200
 201                 /**
 202                  * @brief   Configure the maximum number of inference results
 203                  *
 204                  * @since_tizen 5.5
 205                  */
 206                 void ConfigureOutput(const int maxOutputNumbers);
 207
 208                 /**
 209                  * @brief   Configure the confidence threshold
 210                  *
 211                  * @since_tizen 5.5
 212                  */
 213                 void ConfigureThreshold(const double threshold);
 214
 215                 /**
 216                  * @brief   Parses the metadata file path
 217                  *
 218                  * @since_tizen 6.5
 219                  */
 220                 int ParseMetadata(const std::string filePath);
 221
 222                 /**
 223                  * @brief   Bind a backend engine
 224                  * @details Use this function to bind a backend engine for the inference.
 225                  *                      This creates a inference engine common class object, and loads a backend
 226                  *                      library which interfaces with a Neural Network runtime such as TF Lite,
 227                  *                      OpenCV, ARMNN and so on.
 228                  *
 229                  *                      Ps. The created inference engine common object will be released and its
 230                  *                              corresponding backend library will be unbound when deconstructor
 231                  *                              of Inference class will be called.
 232                  *
 233                  * @since_tizen 6.0
 234                  *
 235                  * @return @c 0 on success, otherwise a negative error value
 236                  * @retval #MEDIA_VISION_ERROR_NONE Successful
 237                  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
 238                  */
 239                 int Bind();
 240
 241                 /**
 242                  * @brief   Set default configuration for the inference
 243                  * @details Use this function to set default configuration given in json file by user.
 244                  *
 245                  *                      Ps. this callback should be called after Bind callback.
 246                  *
 247                  * @since_tizen 6.0
 248                  *
 249                  * @return @c 0 on success, otherwise a negative error value
 250                  * @retval #MEDIA_VISION_ERROR_NONE Successful
 251                  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
 252                  */
 253                 int Prepare();
 254
 255                 /**
 256                  * @brief   Load model files
 257                  * @details Use this function to load given model files for the inference.
 258                  *
 259                  *                      Ps. this callback should be called after Prepare callback.
 260                  *
 261                  * @since_tizen 6.0
 262                  *
 263                  * @return @c 0 on success, otherwise a negative error value
 264                  * @retval #MEDIA_VISION_ERROR_NONE Successful
 265                  * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
 266                  */
 267                 int Load();
 268
 269                 /**
 270                  * @brief       Runs inference with a region of a given image
 271                  * @details Use this function to run forward pass with the given image.
 272                  *          The given image is preprocessed and the region of the image is
 273                  *          thrown to neural network. Then, the output tensor is returned.
 274                  *          If roi is NULL, then full source will be analyzed.
 275                  *
 276                  * @since_tizen 5.5
 277                  * @return @c true on success, otherwise a negative error value
 278                  */
 279                 int Run(std::vector<mv_source_h> &mvSources,
 280                                 std::vector<mv_rectangle_s> &rects);
 281
 282                 /**
 283                  * @brief       Gets that given engine is supported or not
 284                  *
 285                  * @since_tizen 5.5
 286                  * @return @c true on success, otherwise a negative error value
 287                  */
 288                 std::pair<std::string, bool> GetSupportedInferenceBackend(int backend);
 289
 290                 /**
 291                  * @brief       Gets the ImageClassificationResults
 292                  *
 293                  * @since_tizen 5.5
 294                  * @return @c true on success, otherwise a negative error value
 295                  */
 296                 int GetClassficationResults(ImageClassificationResults *results);
 297
 298                 /**
 299                  * @brief       Gets the ObjectDetectioResults
 300                  *
 301                  * @since_tizen 5.5
 302                  * @return @c true on success, otherwise a negative error value
 303                  */
 304                 int GetObjectDetectionResults(ObjectDetectionResults *results);
 305
 306                 /**
 307                  * @brief       Gets the FaceDetectioResults
 308                  *
 309                  * @since_tizen 5.5
 310                  * @return @c true on success, otherwise a negative error value
 311                  */
 312                 int GetFaceDetectionResults(FaceDetectionResults *results);
 313
 314                 /**
 315                  * @brief       Gets the FacialLandmarkDetectionResults
 316                  *
 317                  * @since_tizen 5.5
 318                  * @return @c true on success, otherwise a negative error value
 319                  */
 320                 int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results);
 321
 322                 /**
 323                  * @brief       Gets the PoseLandmarkDetectionResults
 324                  *
 325                  * @since_tizen 6.0
 326                  * @return @c true on success, otherwise a negative error value
 327                  */
 328                 int GetPoseLandmarkDetectionResults(std::unique_ptr<mv_inference_pose_s> &detectionResults,
 329                                                                                 int width, int height);
 330
 331                 mv_engine_config_h GetEngineConfig(void)
 332                 {
 333                         return engine_config;
 334                 }
 335
 336                 void SetEngineConfig(mv_engine_config_h config)
 337                 {
 338                         engine_config = config;
 339                 }
 340
 341                 int GetTargetType()
 342                 {
 343                         return mConfig.mTargetTypes;
 344                 }
 345
 346         private:
 347                 bool mCanRun; /**< The flag indicating ready to run Inference */
 348                 InferenceConfig mConfig;
 349                 inference_engine_capacity mBackendCapacity;
 350                 std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend;
 351                 cv::Size mInputSize;
 352                 int mCh;
 353                 double mThreshold;
 354                 cv::Size mSourceSize;
 355                 mv_engine_config_h engine_config;
 356                 InferenceEngineCommon *mBackend;
 357                 std::map<std::string, int> mModelFormats;
 358                 std::vector<std::string> mUserListName;
 359                 //std::map<std::string, inference_engine_tensor_buffer> mInputTensorBuffers;
 360                 TensorBuffer mInputTensorBuffers;
 361                 inference_engine_layer_property mInputLayerProperty;
 362                 //std::map<std::string, inference_engine_tensor_buffer> mOutputTensorBuffers;
 363                 TensorBuffer mOutputTensorBuffers;
 364                 inference_engine_layer_property mOutputLayerProperty;
 365
 366                 Metadata mMetadata;
 367                 PreProcess mPreProc;
 368
 369         private:
 370                 void CheckSupportedInferenceBackend();
 371                 int ConvertEngineErrorToVisionError(int error);
 372                 int ConvertTargetTypes(int given_types);
 373                 int ConvertToCv(int given_type);
 374                 inference_tensor_data_type_e ConvertToIE(int given_type);
 375                 int Preprocess(cv::Mat cvImg, cv::Mat cvDst, int data_type);
 376                 int PrepareTenosrBuffers(void);
 377                 void CleanupTensorBuffers(void);
 378                 int SetUserFile(std::string filename);
 379                 int FillOutputResult(tensor_t &outputData);
 380
 381         };
 382
 383 } /* Inference */
 384 } /* MediaVision */
 385
 386 #endif /* __MEDIA_VISION_INFERENCE_H__ */