#include "inference_engine_common_impl.h"
#include "inference_engine_common.h"
#include "inference_engine_type.h"
-#include <opencv2/core.hpp>
-#include <opencv2/imgproc.hpp>
-
using namespace InferenceEngineInterface::Common;
-typedef struct _ImageClassficationResults {
- int number_of_classes;
- std::vector<int> indices;
- std::vector<std::string> names;
- std::vector<float> confidences;
-} ImageClassificationResults; /**< structure ImageClassificationResults */
-
-typedef struct _ObjectDetectionResults {
- int number_of_objects;
- std::vector<int> indices;
- std::vector<std::string> names;
- std::vector<float> confidences;
- std::vector<cv::Rect> locations;
-} ObjectDetectionResults; /**< structure ObjectDetectionResults */
-
-typedef struct _FaceDetectionResults {
- int number_of_faces;
- std::vector<float> confidences;
- std::vector<cv::Rect> locations;
-} FaceDetectionResults; /**< structure FaceDetectionResults */
-
-typedef struct _FacialLandMarkDetectionResults {
- int number_of_landmarks;
- std::vector<cv::Point> locations;
-} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */
-
namespace InferenceEngineInterface {
namespace Vision {
*/
int SetInputTensorParamNode(std::string node);
- /**
- * @brief Set an input image's information. Deprecated.
- *
- * @since_tizen 5.5
- */
- int SetInputTensorParamInput(int width, int height, int dim, int ch);
-
- /**
- * @brief Set mean and deviation values. Deprecated.
- *
- * @since_tizen 5.5
- */
- int SetInputTensorParamNorm(double deviation, double mean);
-
/**
* @brief Set output nodes' names. Deprecated.
*
*/
int SetOutputTensorParamNodes(std::vector<std::string> nodes);
- /**
- * @brief Set threshold value. Deprecated.
- *
- * @since_tizen 5.5
- */
- int SetOutputTensorParamThresHold(double threshold);
-
- /**
- * @brief Set the number of outputs. Deprecated.
- *
- * @since_tizen 5.5
- */
- int SetOutputTensorParamNumbers(int number);
+ int SetInputDataBuffer(tensor_t data);
/**
* @brief Get an input layer property information from a given backend engine.
*/
int GetOutputTensorProperty(inference_engine_layer_property *property);
+ void *GetInputDataPtr();
+
+ int GetInputLayerAttrType();
+
/**
* @brief Set an input layer property information to a given backend engine.
*
*
* @since_tizen 5.5
*/
- int Run(cv::Mat tensor);
+ int Run();
/**
* @brief Run an inference with user-given input and output buffers.
std::vector<inference_engine_tensor_buffer> &output_buffers);
/**
- * @brief Get inference results of image classification.
- *
- * @since_tizen 5.5
- */
- int GetInferenceResult(ImageClassificationResults& results);
-
- /**
- * @brief Get inference results of object detection.
+ * @brief Get inference results from a backend engine.
*
- * @since_tizen 5.5
- */
- int GetInferenceResult(ObjectDetectionResults& results);
-
- /**
- * @brief Get inference results of face detection.
- *
- * @since_tizen 5.5
- */
- int GetInferenceResult(FaceDetectionResults& results);
-
- /**
- * @brief Get inference results of facial landmark detection.
- *
- * @since_tizen 5.5
+ * @since_tizen 6.0
*/
- int GetInferenceResult(FacialLandMarkDetectionResults& results);
+ int GetInferenceResult(tensor_t& results);
- /**
- * @brief Set SetUserFile.
- * @details Image Classification and Object Detection needs a user file
- * including labels of classification and objects.
- * @since_tizen 5.5
- */
- int SetUserFile(std::string filename);
int SetLibraryPath(std::string path);
-protected:
- /**
- * @brief Set an input with a type of cv::Mat
- * @details Set an input and pre-process the input
- *
- * @since_tizen 5.5
- */
- int SetInput(cv::Mat cvImg);
-
- /**
- * @brief Set user list name.
- *
- * @since_tizen 5.5
- */
- void SetUserListName(std::string userlist);
private:
std::string mBackendLibName;
- std::vector<std::string> mUserListName;
-
- int mCh;
- int mDim;
- cv::Size mInputSize;
-
- double mDeviation;
- double mMean;
- double mThreshold;
- int mOutputNumbers;
- cv::Size mSourceSize;
-
- cv::Mat mInputBuffer;
- int mMatType;
InferenceEngineCommon *mCommonEngine; /**< InferenceEngineCommon is used to
do typical process */
#define LOG_TAG "INFERENCE_ENGINE_VISION"
}
-typedef enum {
- InputAttrNoType = 0,
- InputAttrFloat32 = 1,
- InputAttrInt32 = 2,
- InputAttrUInt8 = 3,
- InputAttrInt64 = 4,
- InputAttrString = 5,
- InputAttrBool = 6,
-} InputAttrType;
-
namespace InferenceEngineInterface {
namespace Vision {
InferenceEngineVision::InferenceEngineVision(inference_engine_config *config) :
- mCh(0),
- mDim(0),
- mInputSize(cv::Size()),
- mDeviation(0.0),
- mMean(0.0),
- mThreshold(0.0),
- mOutputNumbers(0),
- mSourceSize(cv::Size()),
- mInputBuffer(cv::Mat()),
- mMatType(0),
mCommonEngine(nullptr)
{
LOGE("ENTER");
}
InferenceEngineVision::InferenceEngineVision(std::string backend) :
- mCh(0),
- mDim(0),
- mInputSize(cv::Size()),
- mDeviation(0.0),
- mMean(0.0),
- mThreshold(0.0),
- mOutputNumbers(0),
- mSourceSize(cv::Size()),
- mInputBuffer(cv::Mat()),
- mMatType(0),
mCommonEngine(nullptr)
{
LOGE("ENTER");
LOGI("LEAVE");
}
-int InferenceEngineVision::SetUserFile(std::string filename)
-{
- std::ifstream fp(filename.c_str());
- if (!fp.is_open()) {
- return INFERENCE_ENGINE_ERROR_INVALID_PATH;
- }
-
- std::string userListName;
- while (!fp.eof()) {
- std::getline(fp, userListName);
- if (userListName.length())
- SetUserListName(userListName);
- }
-
- fp.close();
-
- return INFERENCE_ENGINE_ERROR_NONE;
-}
-
-void InferenceEngineVision::SetUserListName(std::string userlist)
-{
- mUserListName.push_back(userlist);
-}
-
-int InferenceEngineVision::SetInputTensorParamInput(int width, int height, int dim, int ch)
-{
- mCh = ch;
- mDim = dim;
- mInputSize = cv::Size(width, height);
-
- LOGI("InputSize is %d x %d\n", mInputSize.width, mInputSize.height);
-
- return INFERENCE_ENGINE_ERROR_NONE;
-}
-
-int InferenceEngineVision::SetInputTensorParamNorm(double deviation, double mean)
-{
- mDeviation = deviation;
- mMean = mean;
-
- LOGI("mean %.4f, deviation %.4f", mMean, mDeviation);
-
- return INFERENCE_ENGINE_ERROR_NONE;
-}
-
int InferenceEngineVision::SetInputTensorParamNode(std::string node)
{
LOGE("ENTER");
return ret;
}
+void *InferenceEngineVision::GetInputDataPtr()
+{
+ return mCommonEngine->GetInputDataPtr();
+}
+
int InferenceEngineVision::GetOutputTensorProperty(inference_engine_layer_property *property)
{
LOGE("ENTER");
return ret;
}
+int InferenceEngineVision::SetInputDataBuffer(tensor_t data)
+{
+ return mCommonEngine->SetInputDataBuffer(data);
+}
+
int InferenceEngineVision::SetInputTensorProperty(inference_engine_layer_property &property)
{
LOGE("ENTER");
return ret;
}
-int InferenceEngineVision::SetOutputTensorParamThresHold(double threshold)
-{
- mThreshold = threshold;
-
- LOGI("threshold %.4f", mThreshold);
-
- return INFERENCE_ENGINE_ERROR_NONE;
-}
-
-int InferenceEngineVision::SetOutputTensorParamNumbers(int numbers)
-{
- mOutputNumbers = numbers;
-
- LOGI("outputNumber %d", mOutputNumbers);
-
- return INFERENCE_ENGINE_ERROR_NONE;
-}
-
int InferenceEngineVision::SetOutputTensorParamNodes(std::vector<std::string> nodes)
{
LOGI("ENTER");
{
LOGI("ENTER");
- std::string label_file;
- if (model_format == INFERENCE_MODEL_TFLITE || model_format == INFERENCE_MODEL_TORCH) {
- label_file = model_paths[1];
- } else {
- label_file = model_paths[2];
- }
-
- size_t userFileLength = label_file.length();
- if (userFileLength > 0 && access(label_file.c_str(), F_OK)) {
- LOGE("Label file path in [%s] ", label_file.c_str());
- return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
- }
-
- int ret = (userFileLength > 0) ? SetUserFile(label_file) : INFERENCE_ENGINE_ERROR_NONE;
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("Fail to load label file.");
- return ret;
- }
-
// Load model files.
- ret = mCommonEngine->Load(model_paths, model_format);
+ int ret = mCommonEngine->Load(model_paths, model_format);
if (ret != INFERENCE_ENGINE_ERROR_NONE) {
LOGE("Fail to load InferenceEngineVision");
return ret;
}
- //get type and allocate memory to mInputBuffer;
- InputAttrType attrType = static_cast<InputAttrType>(mCommonEngine->GetInputLayerAttrType());
- if (attrType == InputAttrUInt8) {
- LOGI("InputType is %d ch with UINT8", mCh);
- if (mCh == 1) {
- mMatType = CV_8UC1;
- } else if (mCh == 3) {
- mMatType = CV_8UC3;
- } else {
- LOGE("Not supported");
- return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT;
- }
- }
- else if (attrType == InputAttrFloat32) {
- LOGI("InputType is %d ch with FLOAT32", mCh);
- if (mCh == 1) {
- mMatType = CV_32FC1;
- } else if (mCh == 3) {
- mMatType = CV_32FC3;
- } else {
- LOGE("Not supported");
- return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT;
- }
- }
- else {
- LOGE("Not supported");
- ret = INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT;
- return ret;
- }
-
- tensor_t inputData;
- std::vector<int> info{1, mMatType, mInputSize.height, mInputSize.width};
- inputData.dimInfo.push_back(info);
-
- // some plug-in (opencv) doesn't allocate memory for input while loading models
- // But, others (tflite) allcate memory while loading.
- // Thus, the SetInputData() will be implemented in plug-in such as OpenCV, but
- // just leave empty in plug-in such as tflite.
- ret = mCommonEngine->SetInputDataBuffer(inputData);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("Fail to SetInputData");
- return ret;
- }
-
- void *dataPtr = mCommonEngine->GetInputDataPtr();
- if (dataPtr == nullptr) {
- LOGE("input data address is null");
- return INFERENCE_ENGINE_ERROR_INTERNAL;
- }
-
- mInputBuffer = cv::Mat(mInputSize.height, mInputSize.width, mMatType, dataPtr);
-
LOGI("LEAVE");
return ret;
}
+int InferenceEngineVision::GetInputLayerAttrType()
+{
+ return mCommonEngine->GetInputLayerAttrType();
+}
+
int InferenceEngineVision::GetBackendCapacity(inference_engine_capacity *capacity)
{
LOGI("ENTER");
return ret;
}
-int InferenceEngineVision::SetInput(cv::Mat cvImg)
-{
- mSourceSize = cvImg.size();
- int width = mInputSize.width;
- int height = mInputSize.height;
-
- //PreProcess();
- cv::Mat sample;
- if (cvImg.channels() == 3 && mCh == 1)
- cv::cvtColor(cvImg, sample, cv::COLOR_BGR2GRAY);
- else
- sample = cvImg;
-
- // size
- cv::Mat sampleResized;
- if (sample.size() != cv::Size(width, height))
- cv::resize(sample, sampleResized, cv::Size(width, height));
- else
- sampleResized = sample;
-
- // type
- cv::Mat sampleFloat;
- if (mCh == 3)
- sampleResized.convertTo(sampleFloat, CV_32FC3);
- else
- sampleResized.convertTo(sampleFloat, CV_32FC1);
-
- // normalize
- cv::Mat sampleNormalized;
- cv::Mat meanMat;
- if (mCh == 3)
- meanMat = cv::Mat(sampleFloat.size(), CV_32FC3, cv::Scalar((float)mMean, (float)mMean, (float)mMean));
- else
- meanMat = cv::Mat(sampleFloat.size(), CV_32FC1, cv::Scalar((float)mMean));
-
- cv::subtract(sampleFloat, meanMat, sampleNormalized);
-
- sampleNormalized /= (float)mDeviation;
-
- sampleNormalized.convertTo(mInputBuffer, mMatType);
-
- return INFERENCE_ENGINE_ERROR_NONE;
-}
-
-int InferenceEngineVision::Run(cv::Mat tensor)
+int InferenceEngineVision::Run()
{
LOGI("ENTER");
- int ret = SetInput(tensor);
- if (ret != INFERENCE_ENGINE_ERROR_NONE)
- LOGE("Fail to SetInput InferenceEngineVision");
- ret = mCommonEngine->Run();
+ int ret = mCommonEngine->Run();
if (ret != INFERENCE_ENGINE_ERROR_NONE)
LOGE("Fail to run InferenceEngineVision");
return ret;
}
-int InferenceEngineVision::GetInferenceResult(ImageClassificationResults& results)
-{
- LOGI("ENTER");
-
- // Will contain top N results in ascending order.
- std::vector<std::pair<float, int>> top_results;
- std::priority_queue<std::pair<float, int>,
- std::vector<std::pair<float, int>>,
- std::greater<std::pair<float, int>>> top_result_pq;
- float value;
-
- tensor_t outputData;
- int ret = mCommonEngine->GetInferenceResult(outputData);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("fail to GetInferenceResults with ImageClassificationResults");
- return ret;
- }
-
- std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
- std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-
- long count = inferDimInfo[0][1];
- LOGI("count: %ld", count);
-
- float *prediction = reinterpret_cast<float*>(inferResults[0]);
- for (int i = 0; i < count; ++i) {
- value = prediction[i];
- // Only add it if it beats the threshold and has a chance at being in
- // the top N.
- top_result_pq.push(std::pair<float, int>(value, i));
-
- // If at capacity, kick the smallest value out.
- if (top_result_pq.size() > mOutputNumbers) {
- top_result_pq.pop();
- }
- }
-
- // Copy to output vector and reverse into descending order.
- while (!top_result_pq.empty()) {
- top_results.push_back(top_result_pq.top());
- top_result_pq.pop();
- }
- std::reverse(top_results.begin(), top_results.end());
-
- int classIdx = -1;
- results.number_of_classes = 0;
- for (int idx = 0; idx < top_results.size(); ++idx) {
- if (top_results[idx].first < mThreshold)
- continue;
- LOGI("idx:%d", idx);
- LOGI("classIdx: %d", top_results[idx].second);
- LOGI("classProb: %f", top_results[idx].first);
-
- classIdx = top_results[idx].second;
- results.indices.push_back(classIdx);
- results.confidences.push_back(top_results[idx].first);
- results.names.push_back(mUserListName[classIdx]);
- results.number_of_classes++;
- }
-
- LOGI("LEAVE");
-
- return ret;
-}
-
-int InferenceEngineVision::GetInferenceResult(ObjectDetectionResults& results)
+int InferenceEngineVision::GetInferenceResult(tensor_t &results)
{
- tensor_t outputData;
- int ret = mCommonEngine->GetInferenceResult(outputData);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("fail to GetInferenceResults with ObjectDetectionResults");
- return ret;
- }
-
- std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
- std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-
- float* boxes = reinterpret_cast<float*>(inferResults[0]);
- float* classes = reinterpret_cast<float*>(inferResults[1]);
- float* scores = reinterpret_cast<float*>(inferResults[2]);
- int number_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
-
- int left, top, right, bottom;
- cv::Rect loc;
-
- results.number_of_objects = 0;
- for (int idx = 0; idx < number_of_detections; ++idx) {
- if (scores[idx] < mThreshold)
- continue;
-
- left = (int)(boxes[idx*4 + 1] * mSourceSize.width);
- top = (int)(boxes[idx*4 + 0] * mSourceSize.height);
- right = (int)(boxes[idx*4 + 3] * mSourceSize.width);
- bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height);
-
- loc.x = left;
- loc.y = top;
- loc.width = right -left + 1;
- loc.height = bottom - top + 1;
-
- results.indices.push_back((int)classes[idx]);
- results.confidences.push_back(scores[idx]);
- results.names.push_back(mUserListName[(int)classes[idx]]);
- results.locations.push_back(loc);
- results.number_of_objects++;
-
- LOGI("objectClass: %d", (int)classes[idx]);
- LOGI("confidence:%f", scores[idx]);
- LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
- }
-
- return ret;
-}
-
-int InferenceEngineVision::GetInferenceResult(FaceDetectionResults& results)
-{
- tensor_t outputData;
- int ret = mCommonEngine->GetInferenceResult(outputData);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("fail to GetInferenceResults with FaceDetectionResults");
- return ret;
- }
-
- std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
- std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-
- float* boxes = reinterpret_cast<float*>(inferResults[0]);
- float* classes = reinterpret_cast<float*>(inferResults[1]);
- float* scores = reinterpret_cast<float*>(inferResults[2]);
-
- int number_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
- int left, top, right, bottom;
- cv::Rect loc;
-
- results.number_of_faces = 0;
- for (int idx = 0; idx < number_of_detections; ++idx) {
- if (scores[idx] < mThreshold)
- continue;
-
- left = (int)(boxes[idx*4 + 1] * mSourceSize.width);
- top = (int)(boxes[idx*4 + 0] * mSourceSize.height);
- right = (int)(boxes[idx*4 + 3] * mSourceSize.width);
- bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height);
-
- loc.x = left;
- loc.y = top;
- loc.width = right -left + 1;
- loc.height = bottom - top + 1;
-
- results.confidences.push_back(scores[idx]);
- results.locations.push_back(loc);
- results.number_of_faces++;
-
- LOGI("confidence:%f", scores[idx]);
- LOGI("class: %f", classes[idx]);
- LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx*4 + 1], boxes[idx*4 + 0], boxes[idx*4 + 3], boxes[idx*4 + 2]);
- LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
- }
-
- return ret;
-}
-
-int InferenceEngineVision::GetInferenceResult(FacialLandMarkDetectionResults& results)
-{
-
- tensor_t outputData;
- int ret = mCommonEngine->GetInferenceResult(outputData);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("fail to GetInferenceResults with FacialLandMarkDetectionResults");
- return ret;
- }
-
- std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
- std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-
- long number_of_detections = inferDimInfo[0][1];
- float* loc = reinterpret_cast<float*>(inferResults[0]);
-
- results.number_of_landmarks = 0;
-
- cv::Point point(0,0);
- results.number_of_landmarks = 0;
- LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
- for (int idx = 0; idx < number_of_detections; idx+=2) {
- point.x = (int)(loc[idx] * mSourceSize.width);
- point.y = (int)(loc[idx+1] * mSourceSize.height);
-
- results.locations.push_back(point);
- results.number_of_landmarks++;
-
- LOGI("x:%d, y:%d", point.x, point.y);
- }
-
- return ret;
+ return mCommonEngine->GetInferenceResult(results);
}
int InferenceEngineVision::SetLibraryPath(std::string path)