return mSupportedInferenceBackend[backend];
}
- int Inference::GetClassficationResults(ImageClassificationResults &results)
+ int Inference::GetClassficationResults(ImageClassificationResults *results)
{
// Will contain top N results in ascending order.
std::vector<std::pair<float, int>> topScore;
auto threadHold = mConfig.mConfidenceThresHold;
- results.number_of_classes = 0;
-
if (mMetadata.GetOutputMeta().IsParsed()) {
OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
std::reverse(topScore.begin(), topScore.end());
}
+ results->number_of_classes = 0;
+
for (auto& score : topScore) {
LOGI("score: %.3f, threshold: %.3f", score.first, threadHold);
LOGI("idx:%d", score.second);
LOGI("classProb: %.3f", score.first);
- results.indices.push_back(score.second);
- results.confidences.push_back(score.first);
- results.names.push_back(mUserListName[score.second]);
- results.number_of_classes++;
+ results->indices.push_back(score.second);
+ results->confidences.push_back(score.first);
+ results->names.push_back(mUserListName[score.second]);
+ results->number_of_classes++;
}
- LOGE("Inference: GetClassificationResults: %d\n", results.number_of_classes);
+ LOGE("Inference: GetClassificationResults: %d\n", results->number_of_classes);
return MEDIA_VISION_ERROR_NONE;
}
int Inference::GetObjectDetectionResults(
- ObjectDetectionResults *detectionResults)
+ ObjectDetectionResults *results)
{
if (mMetadata.GetOutputMeta().IsParsed()) {
OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
int boxOffset = 0;
int numberOfObjects = 0;
+
if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
objDecoder.init();
objDecoder.decode();
- ObjectDetectionResults results;
- results.number_of_objects = 0;
+ results->number_of_objects = 0;
for (auto& box : objDecoder.getObjectAll()) {
- results.indices.push_back(box.index);
- results.names.push_back(mUserListName[box.index]);
- results.confidences.push_back(box.score);
- results.locations.push_back(cv::Rect(
+ results->indices.push_back(box.index);
+ results->names.push_back(mUserListName[box.index]);
+ results->confidences.push_back(box.score);
+ results->locations.push_back(cv::Rect(
static_cast<int>((box.location.x - box.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
static_cast<int>((box.location.y - box.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
static_cast<int>(box.location.width * static_cast<float>(mSourceSize.width)),
static_cast<int>(box.location.height * static_cast<float>(mSourceSize.height))));
- results.number_of_objects++;
+ results->number_of_objects++;
}
- *detectionResults = results;
- LOGI("Inference: GetObjectDetectionResults: %d\n",
- results.number_of_objects);
+
+ LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
} else {
tensor_t outputData;
float *scores = nullptr;
int number_of_detections = 0;
- cv::Mat cvScores, cvClasses, cvBoxes;
if (outputData.dimInfo.size() == 1) {
// there is no way to know how many objects are detect unless the number of objects aren't
// provided. In the case, each backend should provide the number of results manually.
cv::Mat cvTop = cvOutputData.col(4).clone();
cv::Mat cvRight = cvOutputData.col(5).clone();
cv::Mat cvBottom = cvOutputData.col(6).clone();
-
+ cv::Mat cvScores, cvClasses, cvBoxes;
cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
+
cv::hconcat(cvBoxElems, 4, cvBoxes);
// classes
boxes = cvBoxes.ptr<float>(0);
classes = cvClasses.ptr<float>(0);
scores = cvScores.ptr<float>(0);
-
} else {
boxes = reinterpret_cast<float *>(outputData.data[0]);
classes = reinterpret_cast<float *>(outputData.data[1]);
scores = reinterpret_cast<float *>(outputData.data[2]);
- number_of_detections =
- (int) (*reinterpret_cast<float *>(outputData.data[3]));
+ number_of_detections = (int) (*reinterpret_cast<float *>(outputData.data[3]));
}
LOGI("number_of_detections = %d", number_of_detections);
- int left, top, right, bottom;
- cv::Rect loc;
+ results->number_of_objects = 0;
- ObjectDetectionResults results;
- results.number_of_objects = 0;
for (int idx = 0; idx < number_of_detections; ++idx) {
if (scores[idx] < mThreshold)
continue;
- left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
- top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
- right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
- bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+ int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+ int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+ int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+ int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+ cv::Rect loc;
loc.x = left;
loc.y = top;
loc.width = right - left + 1;
loc.height = bottom - top + 1;
- results.indices.push_back(static_cast<int>(classes[idx]));
- results.confidences.push_back(scores[idx]);
- results.names.push_back(
+ results->indices.push_back(static_cast<int>(classes[idx]));
+ results->confidences.push_back(scores[idx]);
+ results->names.push_back(
mUserListName[static_cast<int>(classes[idx])]);
- results.locations.push_back(loc);
- results.number_of_objects++;
+ results->locations.push_back(loc);
+ results->number_of_objects++;
LOGI("objectClass: %d", static_cast<int>(classes[idx]));
LOGI("confidence:%f", scores[idx]);
bottom);
}
- *detectionResults = results;
- LOGI("Inference: GetObjectDetectionResults: %d\n",
- results.number_of_objects);
+ LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
}
return MEDIA_VISION_ERROR_NONE;
}
- int
- Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
+ int Inference::GetFaceDetectionResults(FaceDetectionResults *results)
{
if (mMetadata.GetOutputMeta().IsParsed()) {
OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
int boxOffset = 0;
int numberOfFaces = 0;
+
if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
objDecoder.init();
objDecoder.decode();
- FaceDetectionResults results;
- results.number_of_faces = 0;
+ results->number_of_faces = 0;
for (auto& face : objDecoder.getObjectAll()) {
- results.confidences.push_back(face.score);
- results.locations.push_back(cv::Rect(
+ results->confidences.push_back(face.score);
+ results->locations.push_back(cv::Rect(
static_cast<int>((face.location.x - face.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
static_cast<int>((face.location.y - face.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
static_cast<int>(face.location.width * static_cast<float>(mSourceSize.width)),
static_cast<int>(face.location.height * static_cast<float>(mSourceSize.height))));
- results.number_of_faces++;
+ results->number_of_faces++;
}
- *detectionResults = results;
- LOGE("Inference: GetFaceDetectionResults: %d\n",
- results.number_of_faces);
+
+ LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
} else {
tensor_t outputData;
float *classes = nullptr;
float *scores = nullptr;
int number_of_detections = 0;
-
cv::Mat cvScores, cvClasses, cvBoxes;
+
if (outputData.dimInfo.size() == 1) {
// there is no way to know how many objects are detect unless the number of objects aren't
// provided. In the case, each backend should provide the number of results manually.
// indicates the image id. But it is useless if a batch mode isn't supported.
// So, use the 1st of 7.
- number_of_detections = static_cast<int>(
- *reinterpret_cast<float *>(outputData.data[0]));
- cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3],
- CV_32F, outputData.data[0]);
+ number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[0]));
+ cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]);
// boxes
cv::Mat cvLeft = cvOutputData.col(3).clone();
cv::Mat cvTop = cvOutputData.col(4).clone();
cv::Mat cvRight = cvOutputData.col(5).clone();
cv::Mat cvBottom = cvOutputData.col(6).clone();
-
cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
cv::hconcat(cvBoxElems, 4, cvBoxes);
boxes = cvBoxes.ptr<float>(0);
classes = cvClasses.ptr<float>(0);
scores = cvScores.ptr<float>(0);
-
} else {
boxes = reinterpret_cast<float *>(outputData.data[0]);
classes = reinterpret_cast<float *>(outputData.data[1]);
scores = reinterpret_cast<float *>(outputData.data[2]);
- number_of_detections = static_cast<int>(
- *reinterpret_cast<float *>(outputData.data[3]));
+ number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[3]));
}
- int left, top, right, bottom;
- cv::Rect loc;
+ results->number_of_faces = 0;
- FaceDetectionResults results;
- results.number_of_faces = 0;
for (int idx = 0; idx < number_of_detections; ++idx) {
if (scores[idx] < mThreshold)
continue;
- left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
- top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
- right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
- bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+ int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+ int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+ int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+ int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+ cv::Rect loc;
loc.x = left;
loc.y = top;
loc.width = right - left + 1;
loc.height = bottom - top + 1;
-
- results.confidences.push_back(scores[idx]);
- results.locations.push_back(loc);
- results.number_of_faces++;
+ results->confidences.push_back(scores[idx]);
+ results->locations.push_back(loc);
+ results->number_of_faces++;
LOGI("confidence:%f", scores[idx]);
LOGI("class: %f", classes[idx]);
LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1],
boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]);
- LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right,
- bottom);
+ LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
}
- *detectionResults = results;
- LOGE("Inference: GetFaceDetectionResults: %d\n",
- results.number_of_faces);
+ LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
}
return MEDIA_VISION_ERROR_NONE;
}
poseDecoder.decode(inputW, inputH, thresRadius);
-
- int part = 0;
poseResult->number_of_poses = poseDecoder.getNumberOfPose();
+
for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) {
for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
- part = landmarkIndex;
+ int part = landmarkIndex;
if (!mUserListName.empty()) {
part = std::stoi(mUserListName[landmarkIndex]) - 1;
if (part < 0) {
loc2f.x = (static_cast<float>(loc.x) / ratioX);
loc2f.y = (static_cast<float>(loc.y) / ratioY);
+
LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]",
landmarkIndex, part, loc2f.x, loc2f.y, score);