*/
using namespace InferenceEngineInterface::Common;
-typedef struct _ImageClassficationResults
+struct ImageClassificationResults
{
- int number_of_classes;
+ int number_of_classes = 0;
std::vector<int> indices;
std::vector<std::string> names;
std::vector<float> confidences;
-} ImageClassificationResults; /**< structure ImageClassificationResults */
+};
-typedef struct _ObjectDetectionResults
+struct ObjectDetectionResults
{
- int number_of_objects;
+ int number_of_objects = 0;
std::vector<int> indices;
std::vector<std::string> names;
std::vector<float> confidences;
std::vector<cv::Rect> locations;
-} ObjectDetectionResults; /**< structure ObjectDetectionResults */
+};
-typedef struct _FaceDetectionResults
+struct FaceDetectionResults
{
- int number_of_faces;
+ int number_of_faces = 0;
std::vector<float> confidences;
std::vector<cv::Rect> locations;
-} FaceDetectionResults; /**< structure FaceDetectionResults */
+};
-typedef struct _FacialLandMarkDetectionResults
+struct FacialLandMarkDetectionResults
{
int number_of_landmarks;
std::vector<cv::Point> locations;
-} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */
+};
-typedef struct _PoseLandmarkDetectionResults
+struct PoseLandmarkDetectionResults
{
- int number_of_landmarks;
+ int number_of_landmarks = 0;
std::vector<cv::Point2f> locations;
std::vector<float> score;
-} PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */
+};
namespace mediavision
{
}
private:
- bool mCanRun; /**< The flag indicating ready to run Inference */
+ bool mCanRun = false; /**< The flag indicating ready to run Inference */
InferenceConfig mConfig;
inference_engine_capacity mBackendCapacity;
- std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend;
+
+ // Mediavision can support several inference engines via ML Single API
+ // "mlapi" means that the inference backend is used via ML Single API.
+ std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend = {
+ { MV_INFERENCE_BACKEND_OPENCV, { "opencv", false } }, { MV_INFERENCE_BACKEND_TFLITE, { "tflite", false } },
+ { MV_INFERENCE_BACKEND_ARMNN, { "armnn", false } }, { MV_INFERENCE_BACKEND_MLAPI, { "mlapi", false } },
+ { MV_INFERENCE_BACKEND_ONE, { "mlapi", false } }, { MV_INFERENCE_BACKEND_NNTRAINER, { "mlapi", false } },
+ { MV_INFERENCE_BACKEND_SNPE, { "mlapi", false } },
+ };
cv::Size mInputSize;
cv::Size mSourceSize;
- mv_engine_config_h engine_config;
- InferenceEngineCommon *mBackend;
- std::map<std::string, int> mModelFormats;
+ mv_engine_config_h engine_config = nullptr;
+ InferenceEngineCommon *mBackend = nullptr;
+
+ std::map<std::string, int> mModelFormats = {
+ { "caffemodel", INFERENCE_MODEL_CAFFE }, { "pb", INFERENCE_MODEL_TF },
+ { "tflite", INFERENCE_MODEL_TFLITE }, { "t7", INFERENCE_MODEL_TORCH },
+ { "weights", INFERENCE_MODEL_DARKNET }, { "bin", INFERENCE_MODEL_DLDT },
+ { "onnx", INFERENCE_MODEL_ONNX }, { "nb", INFERENCE_MODEL_VIVANTE },
+ { "ini", INFERENCE_MODEL_NNTRAINER }, { "dlc", INFERENCE_MODEL_SNPE },
+ };
std::vector<std::string> mUserListName;
- //std::map<std::string, inference_engine_tensor_buffer> mInputTensorBuffers;
TensorBuffer mInputTensorBuffers;
inference_engine_layer_property mInputLayerProperty;
- //std::map<std::string, inference_engine_tensor_buffer> mOutputTensorBuffers;
TensorBuffer mOutputTensorBuffers;
inference_engine_layer_property mOutputLayerProperty;
}
Inference::Inference()
- : mCanRun()
- , mConfig()
- , mBackendCapacity()
- , mSupportedInferenceBackend()
- , mInputSize(cv::Size())
- , mSourceSize(cv::Size())
- , engine_config()
- , mBackend()
- , mMetadata()
- , mPreProc()
{
LOGI("ENTER");
- // Mediavision can support several inference engines via ML Single API
- // "mlapi" means that the inference backend is used via ML Single API.
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false)));
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false)));
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_ARMNN, std::make_pair("armnn", false)));
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false)));
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false)));
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_NNTRAINER, std::make_pair("mlapi", false)));
- mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_SNPE, std::make_pair("mlapi", false)));
-
CheckSupportedInferenceBackend();
for (auto &backend : mSupportedInferenceBackend) {
LOGI("%s: %s", backend.second.first.c_str(), backend.second.second ? "TRUE" : "FALSE");
}
-
- mModelFormats.insert(std::make_pair<std::string, int>("caffemodel", INFERENCE_MODEL_CAFFE));
- mModelFormats.insert(std::make_pair<std::string, int>("pb", INFERENCE_MODEL_TF));
- mModelFormats.insert(std::make_pair<std::string, int>("tflite", INFERENCE_MODEL_TFLITE));
- mModelFormats.insert(std::make_pair<std::string, int>("t7", INFERENCE_MODEL_TORCH));
- mModelFormats.insert(std::make_pair<std::string, int>("weights", INFERENCE_MODEL_DARKNET));
- mModelFormats.insert(std::make_pair<std::string, int>("bin", INFERENCE_MODEL_DLDT));
- mModelFormats.insert(std::make_pair<std::string, int>("onnx", INFERENCE_MODEL_ONNX));
- mModelFormats.insert(std::make_pair<std::string, int>("nb", INFERENCE_MODEL_VIVANTE));
- mModelFormats.insert(std::make_pair<std::string, int>("ini", INFERENCE_MODEL_NNTRAINER));
- mModelFormats.insert(std::make_pair<std::string, int>("dlc", INFERENCE_MODEL_SNPE));
-
LOGI("LEAVE");
}
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- int boxOffset = 0;
- int numberOfObjects = 0;
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
+ if (boxIndexes.size() != 1) {
+ LOGE("Invalid dim size. It should be 1");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
- if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
- std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
- if (boxIndexes.size() != 1) {
- LOGE("Invalid dim size. It should be 1");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
- boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
- } else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
- std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
- if (boxIndexes.size() != 1) {
- LOGE("Invalid dim size. It should be 1");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
- boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+ int boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+ int numberOfObjects = 0;
+ if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
if (scoreIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
- } else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
- std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
- if (boxIndexes.size() != 1) {
- LOGE("Invalid dim size. It should be 1");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
- boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+ } else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR) {
numberOfObjects = boxOffset / outputMeta.GetBoxDecodeInfo().GetCellNumScales() - 5;
}
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- int boxOffset = 0;
- int numberOfFaces = 0;
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
+ if (boxIndexes.size() != 1) {
+ LOGE("Invalid dim size. It should be 1");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
- if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
- std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
- if (boxIndexes.size() != 1) {
- LOGE("Invalid dim size. It should be 1");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
- boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
- } else {
- std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
- if (boxIndexes.size() != 1) {
- LOGE("Invalid dim size. It should be 1");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
- boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+ int boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+ int numberOfFaces = 0;
+ if (outputMeta.GetBoxDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) {
std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
if (scoreIndexes.size() != 1) {
LOGE("Invaid dim size. It should be 1");