From a5b92ea0bc8bebd388caeb92cf1c10041c46b158 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Thu, 20 May 2021 09:57:02 +0900 Subject: [PATCH 01/16] Add PoseDecoder and Landmark to decode various type of pose output tensor Change-Id: I8be806ff3522aec1f7026912b8c317055e9e16db Signed-off-by: Tae-Young Chung --- .../mv_inference/inference/include/Landmark.h | 53 +++ .../mv_inference/inference/include/PoseDecoder.h | 95 ++++ .../mv_inference/inference/src/PoseDecoder.cpp | 483 +++++++++++++++++++++ 3 files changed, 631 insertions(+) create mode 100644 mv_machine_learning/mv_inference/inference/include/Landmark.h create mode 100644 mv_machine_learning/mv_inference/inference/include/PoseDecoder.h create mode 100644 mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h new file mode 100644 index 0000000..63ccf60 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_LANDMARK_H__ +#define __MEDIA_VISION_LANDMARK_H__ + +#include +#include +#include +#include + +/** + * @file Landmark.h + * @brief This file contains the Landmark class definition which + * provides landmark information. + */ + +namespace mediavision +{ +namespace inference +{ + typedef struct _LandmarkPoint + { + float score; + cv::Point heatMapLoc; + cv::Point2f decodedLoc; + int id; + bool valid; + } LandmarkPoint; + + typedef struct _LandmarkResults + { + std::vector landmarks; + float score; + } LandmarkResults; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_LANDMARK_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h new file mode 100644 index 0000000..c910d62 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h @@ -0,0 +1,95 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_POSEDECODER_H__ +#define __MEDIA_VISION_POSEDECODER_H__ + +#include +#include +#include +#include + + +#include "TensorBuffer.h" +#include "OutputMetadata.h" +#include "Landmark.h" + +/** + * @file PoseDecoder.h + * @brief This file contains the PoseDecoder class definition which + * provides pose decoder. + */ + +namespace mediavision +{ +namespace inference +{ + class PoseDecoder + { + private: + TensorBuffer mTensorBuffer; + OutputMetadata mMeta; + int mHeatMapWidth; + int mHeatMapHeight; + int mHeatMapChannel; + int mNumberOfLandmarks; + + std::list mCandidates; + std::vector mPoseLandmarks; + + int getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH); + int getPosToIndex(LandmarkPoint& landmark); + int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal); + int findPose(LandmarkPoint& root, std::vector& decodedLandmarks, + float scaleW, float scaleH); + int traverseToNeighbor(int edgeId, int toId, int dir, + LandmarkPoint fromLandmark, LandmarkPoint& toLandmark, + float scaleW, float scaleH); + int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector); + + int convertXYZtoX(int x, int y, int c); + + cv::Point convertXYZtoXY(int x, int y, int c); + + public: + PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData, + int heatMapWidth, int heatMapHeight, int heatMapChannel, + int numberOfLandmarks) : + mTensorBuffer(buffer), + mHeatMapWidth(heatMapWidth), + mHeatMapHeight(heatMapHeight), + mHeatMapChannel(heatMapChannel), + mNumberOfLandmarks(numberOfLandmarks) { + mMeta = metaData; + }; + + ~PoseDecoder() = default; + + int init(); + + int decode(float scaleWidth, float scaleHeight, float thresHoldRadius); + + int getNumberOfPose(); + + float getPointX(int poseIdx, int partIdx); + float getPointY(int poseIdx, int partIdx); + float getScore(int poseIdx, int partIdx); + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_POSEDECODER_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp new file mode 100644 index 0000000..f30fbf9 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -0,0 +1,483 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "PoseDecoder.h" +#include "PostProcess.h" + +#include +#include +#include + +#define MAX_NUMBER_OF_POSE 5 +#define MAX_NUMBER_OF_CORRECTION 3 + +namespace mediavision +{ +namespace inference +{ + int PoseDecoder::convertXYZtoX(int x, int y, int c) + { + return y * mHeatMapWidth * mHeatMapChannel + + x * mHeatMapChannel + + c; + } + + cv::Point PoseDecoder::convertXYZtoXY(int x, int y, int c) + { + int idxY = y * mHeatMapWidth * mHeatMapChannel * 2 + + x * mHeatMapChannel * 2 + + c; + + int idxX = idxY + mHeatMapChannel; + + return cv::Point(idxX, idxY); + } + + int PoseDecoder::init() + { + LOGI("ENTER"); + + Landmark& landmarkInfo = mMeta.GetLandmark(); + + if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) { + LOGE("Not supported landmark type"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + if (landmarkInfo.GetDecodingType() == 0) { + LOGI("Skip init"); + return MEDIA_VISION_ERROR_NONE; + } + + int x,y,c; + int sx, sy, ex, ey, dx, dy; + float score, localScore; + int idx; + bool isLocalMax; + ScoreInfo& scoreInfo = mMeta.GetScore(); + + mCandidates.clear(); + + if (landmarkInfo.GetType() == 0 || + landmarkInfo.GetType() == 2) { + mCandidates.resize(mHeatMapChannel); + } + + for (y = 0; y < mHeatMapHeight; ++y) { + for (x = 0; x < mHeatMapWidth; ++x) { + std::list::iterator candidate = mCandidates.begin(); + for (c = 0; c < mHeatMapChannel; ++c, candidate++) { + isLocalMax = true; + idx = convertXYZtoX(x, y, c); + score = mTensorBuffer.getValue(scoreInfo.GetName(), idx); + if (scoreInfo.GetType() == 1) { + score = PostProcess::sigmoid(score); + } + + if (score < scoreInfo.GetThresHold()) + continue; + + if (landmarkInfo.GetType() == 0 || + landmarkInfo.GetType() == 2) { + if (score <= candidate->score) + continue; + + candidate->score = score; + candidate->heatMapLoc.x = x; + candidate->heatMapLoc.y = y; + candidate->id = c; + + } else { //landmarkInfo.type == 1 + sx = std::max(x - 1, 0); + sy = std::max(y - 1, 0); + ex = std::min(x + 2, mHeatMapWidth); + ey = std::min(y + 2, mHeatMapHeight); + + for (dy = sy; dy < ey; ++dy) { + for (dx = sx; dx < ex; ++dx) { + idx = convertXYZtoX(dx, dy, c); + localScore = mTensorBuffer.getValue(scoreInfo.GetName(), idx); + if (scoreInfo.GetType() == 1) { + localScore = PostProcess::sigmoid(localScore); + } + if (localScore > score) { + isLocalMax = false; + break; + } + } + if (isLocalMax == false) + break; + } + + if (isLocalMax == false) + continue; + + // add this to list + LOGI("[%d x %d][%d]: score %.3f", y, x, c, score); + std::list::iterator iter; + for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) { + if ((*iter).score < score) { + break; + } + } + + LandmarkPoint localLandmark; + localLandmark.score = score; + localLandmark.heatMapLoc.x = x; + localLandmark.heatMapLoc.y = y; + localLandmark.id = c; + localLandmark.valid = false; + mCandidates.insert(iter, localLandmark); + } + } + } + } // end of init + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PoseDecoder::getNumberOfPose() + { + return std::min(static_cast(mPoseLandmarks.size()), MAX_NUMBER_OF_POSE); + } + + int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal) + { + if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) { + offsetVal.x = offsetVal.y = 0.f; + LOGI("No offset value"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id); + + try { + offsetVal.x = mTensorBuffer.getValue(mMeta.GetOffset().GetName(), idx.x); + offsetVal.y = mTensorBuffer.getValue(mMeta.GetOffset().GetName(), idx.y); + } catch (const std::exception& e) { + LOGE("Fail to get value at (%d, %d) from %s", + idx.x, idx.y, mMeta.GetOffset().GetName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + return MEDIA_VISION_ERROR_NONE; + } + + float PoseDecoder::getPointX(int poseIdx, int partIdx) + { + LOGI("idx[%d]-part[%d]", poseIdx, partIdx); + return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x; + } + + float PoseDecoder::getPointY(int poseIdx, int partIdx) + { + LOGI("idx[%d]-part[%d]", poseIdx, partIdx); + return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y; + } + + float PoseDecoder::getScore(int poseIdx, int partIdx) + { + return mPoseLandmarks[poseIdx].landmarks[partIdx].score; + } + + int PoseDecoder::getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH) + { + if (scaleW <= 0.0f || scaleH <= 0.0f) { + LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + cv::Point2f offsetVal; + getOffsetValue(point, offsetVal); + + point.decodedLoc.x = static_cast(point.heatMapLoc.x) / static_cast(mHeatMapWidth - 1); + point.decodedLoc.y = static_cast(point.heatMapLoc.y) / static_cast(mHeatMapHeight - 1); + + point.decodedLoc.x += offsetVal.x / scaleW; + point.decodedLoc.y += offsetVal.y / scaleH; + + return MEDIA_VISION_ERROR_NONE; + } + + int PoseDecoder::getPosToIndex(LandmarkPoint& point) + { + cv::Point posVal; + + posVal.x = roundf(point.decodedLoc.x * static_cast(mHeatMapWidth - 1)); + posVal.y = roundf(point.decodedLoc.y * static_cast(mHeatMapHeight - 1)); + + posVal.x = std::max(std::min(posVal.x, mHeatMapWidth - 1), 0); + posVal.y = std::max(std::min(posVal.y, mHeatMapHeight - 1), 0); + + point.heatMapLoc = posVal; + + return MEDIA_VISION_ERROR_NONE; + } + + int PoseDecoder::decode(float scaleWidth, float scaleHeight, float thresHoldRadius) + { + LOGI("ENTER"); + + if (scaleWidth <= 0.0f || scaleHeight <= 0.0f) { + LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleWidth, scaleHeight); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + mPoseLandmarks.clear(); + LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false}; + + Landmark& landmarkInfo = mMeta.GetLandmark(); + ScoreInfo& scoreInfo = mMeta.GetScore(); + + if (landmarkInfo.GetType() == 0 || + landmarkInfo.GetType() == 2) { // single pose + mPoseLandmarks.resize(1); + + if (landmarkInfo.GetDecodingType() == 0) { // direct decoding + mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks); + } else { // heatmap decoding + mPoseLandmarks[0].landmarks.resize(mHeatMapChannel); + } + } + + if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding + while (!mCandidates.empty()) { + + LandmarkPoint &root = mCandidates.front(); + + getIndexToPos(root, scaleWidth, scaleHeight); + + if (landmarkInfo.GetType() == 0) { + root.valid = true; + mPoseLandmarks[0].landmarks[root.id] = root; + mPoseLandmarks[0].score += root.score; + mCandidates.pop_front(); + continue; + } + + LOGI("root id: %d", root.id); + + if (thresHoldRadius > 0.0f) { + bool isSkip = false; + for (auto& result : mPoseLandmarks) { + cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc; + dfRadius -= root.decodedLoc; + float radius = + std::pow(dfRadius.x * scaleWidth, 2.0f) + + std::pow(dfRadius.y * scaleHeight, 2.0f); + LOGI("id[%d], radius: %.f vs. %.f", root.id, radius, std::pow(thresHoldRadius, 2.0f)); + if (radius <= std::pow(thresHoldRadius, 2.0f)) { + LOGI("Not local maximum, Skip this"); + isSkip = true; + break; + } + } + if (isSkip) { + mCandidates.pop_front(); + continue; + } + } + + LOGI("Local maximum. Add this"); + + std::vector decodedLandmarks(mHeatMapChannel, initValue); + + findPose(root, decodedLandmarks, scaleWidth, scaleHeight); + + float poseScore = 0.0f; + for (auto& landmark : decodedLandmarks) { + poseScore += landmark.score; + LOGI("%.3f, %.3f", landmark.decodedLoc.x, landmark.decodedLoc.y); + } + + mPoseLandmarks.push_back(LandmarkResults {decodedLandmarks, poseScore}); + if (mPoseLandmarks.size() > MAX_NUMBER_OF_POSE) + break; + mCandidates.pop_front(); + } + + for (auto& pose : mPoseLandmarks) { + pose.score /= static_cast(mHeatMapChannel); + } + } else { + // multi pose is not supported + std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); + float poseScore = mTensorBuffer.getValue(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]); + if (scoreInfo.GetType() == 1) { + poseScore = PostProcess::sigmoid(poseScore); + } + if (poseScore < scoreInfo.GetThresHold()) { + LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold()); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3; + if (landmarkInfo.GetDecodingType() == 0) { + landmarkOffset = landmarkInfo.GetOffset(); + } + for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { + float px = mTensorBuffer.getValue(landmarkInfo.GetName(), idx * landmarkOffset); + float py = mTensorBuffer.getValue(landmarkInfo.GetName(), idx * landmarkOffset + 1); + + mPoseLandmarks[0].landmarks[idx].score = poseScore; + mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1); + mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight); + mPoseLandmarks[0].landmarks[idx].id = idx; + mPoseLandmarks[0].landmarks[idx].valid = true; + + LOGI("idx[%d]: %.4f, %.4f", idx, px, py); + } + + mPoseLandmarks[0].score = poseScore; + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int PoseDecoder::findPose(LandmarkPoint& root, std::vector& decodedLandmarks, + float scaleW, float scaleH) + { + LOGI("ENTER"); + + if (scaleW <= 0.0f || scaleH <= 0.0f) { + LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + decodedLandmarks[root.id] = root; + decodedLandmarks[root.id].valid = true; + LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y); + LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score); + + int index = static_cast(mMeta.GetEdge().GetEdgesAll().size()) - 1; + for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin(); + riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) { + int fromKeyId = riter->second; + int toKeyId = riter->first; + + if (decodedLandmarks[fromKeyId].valid == true && + decodedLandmarks[toKeyId].valid == false) { + LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId); + traverseToNeighbor(index, toKeyId, 1, + decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId], + scaleW, scaleH); + LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId, + decodedLandmarks[toKeyId].decodedLoc.x, + decodedLandmarks[toKeyId].decodedLoc.y, + decodedLandmarks[toKeyId].score); + } + index--; + } + + index = 0; + for (auto iter = mMeta.GetEdge().GetEdgesAll().begin(); + iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) { + int fromKeyId = iter->first; + int toKeyId = iter->second; + + if (decodedLandmarks[fromKeyId].valid == true && + decodedLandmarks[toKeyId].valid == false) { + LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId); + traverseToNeighbor(index, toKeyId, 0, + decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId], + scaleW, scaleH); + } + index++; + } + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir, + LandmarkPoint fromLandmark, LandmarkPoint& toLandmark, + float scaleW, float scaleH) + { + if (scaleW <= 0.0f || scaleH <= 0.0f) { + LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + cv::Point2f edgeVector(0.f, 0.f); + cv::Point nearHeatMapLoc; + + LOGI("org: %.4f, %.4f", fromLandmark.decodedLoc.x, fromLandmark.decodedLoc.y); + + // update heatMapLoc from decodedLoc; + nearHeatMapLoc.x = roundf(fromLandmark.decodedLoc.x + * static_cast(mHeatMapWidth - 1)); + nearHeatMapLoc.y = roundf(fromLandmark.decodedLoc.y + * static_cast(mHeatMapHeight - 1)); + + nearHeatMapLoc.x = std::max(std::min(nearHeatMapLoc.x, mHeatMapWidth - 1), 0); + nearHeatMapLoc.y = std::max(std::min(nearHeatMapLoc.y, mHeatMapHeight - 1), 0); + + LOGI("src: %d, %d", nearHeatMapLoc.x, nearHeatMapLoc.y); + + getEdgeVector(nearHeatMapLoc, edgeId, dir, edgeVector); + + LOGI("vector: %.4f, %.4f with edgeId %d", edgeVector.x, edgeVector.y, edgeId); + toLandmark.decodedLoc.x = fromLandmark.decodedLoc.x + edgeVector.x / scaleW; + toLandmark.decodedLoc.y = fromLandmark.decodedLoc.y + edgeVector.y / scaleH; + toLandmark.id = toId; + LOGI("tgt: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y); + + for (int iter = 0; iter < MAX_NUMBER_OF_CORRECTION; ++iter) { + getPosToIndex(toLandmark); + getIndexToPos(toLandmark, scaleW, scaleH); + } + + int idx = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id); + toLandmark.score = mTensorBuffer.getValue(mMeta.GetScore().GetName(), idx); + if (mMeta.GetScore().GetType() == 1) { + toLandmark.score = PostProcess::sigmoid(toLandmark.score); + } + + toLandmark.valid = true; + LOGI("Final: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y); + + return MEDIA_VISION_ERROR_NONE; + } + + int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector) + { + LOGI("ENTER"); + + LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size()); + int idxY = index.y * mHeatMapWidth + * static_cast(mMeta.GetEdge().GetEdgesAll().size()) * 2; + idxY += index.x * static_cast(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId; + + int idxX = idxY + static_cast(mMeta.GetEdge().GetEdgesAll().size()); + + for(auto& dispVec : mMeta.GetDispVecAll()){ + if (dispVec.GetType() == dir) { // 0: forward + LOGI("%s", dispVec.GetName().c_str()); + vector.x = mTensorBuffer.getValue(dispVec.GetName(), idxX); + vector.y = mTensorBuffer.getValue(dispVec.GetName(), idxY); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } +} +} -- 2.7.4 From 444006a2fd5bf7e0ca38101e71023b847bb38d81 Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Thu, 24 Jun 2021 14:23:56 +0900 Subject: [PATCH 02/16] Fix printf format Change-Id: I010e68687d67482a05ecd77e1bff4a6b72e27c21 Signed-off-by: Kwang Son --- mv_machine_learning/mv_inference/inference/src/Inference.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 0ce6306..0a20d19 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1263,7 +1263,7 @@ namespace inference for (size_t idx = 0; idx < top_results.size(); ++idx) { if (top_results[idx].first < mThreshold) continue; - LOGI("idx:%lu", idx); + LOGI("idx:%zu", idx); LOGI("classIdx: %d", top_results[idx].second); LOGI("classProb: %f", top_results[idx].first); -- 2.7.4 From be22b20edb3472a06ae33f1b11ffe03248113e3e Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Thu, 24 Jun 2021 16:20:12 +0900 Subject: [PATCH 03/16] Clean unused package dependency Change-Id: I0308800190cf895893f1592ae5c102799f909fee Signed-off-by: Kwang Son --- packaging/capi-media-vision.spec | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index c180dfa..e2543f2 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,22 +1,20 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.8.2 +Version: 0.8.3 Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause Source0: %{name}-%{version}.tar.gz BuildRequires: cmake BuildRequires: pkgconfig(capi-media-tool) -BuildRequires: pkgconfig(libtbm) BuildRequires: pkgconfig(dlog) BuildRequires: pkgconfig(capi-system-info) BuildRequires: pkgconfig(opencv) BuildRequires: pkgconfig(json-glib-1.0) -BuildRequires: dlogutil -BuildRequires: pkgconfig(glib-2.0) BuildRequires: pkgconfig(iniparser) BuildRequires: pkgconfig(inference-engine-interface-common) %if !0%{?ml_only:1} +BuildRequires: pkgconfig(glib-2.0) BuildRequires: pkgconfig(zbar) # Change to the pkgconfig(zint) after zint package refactor BuildRequires: zint -- 2.7.4 From d213865d9e2e8fd6bab3371cf455ba47eec9f779 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Thu, 20 May 2021 10:12:54 +0900 Subject: [PATCH 04/16] Add pose landmark detection inference with outputmetadata Support models which require heatmap decoding with additional refinement as well as models which just provide landmark results. Change-Id: Ic993510d2655d488ea8a43e08a56c13d2f9bc94f Signed-off-by: Tae-Young Chung --- include/mv_inference_private.h | 10 +- .../mv_inference/inference/include/Inference.h | 2 + .../mv_inference/inference/include/PoseDecoder.h | 3 +- .../mv_inference/inference/src/Inference.cpp | 198 ++++++++++++++++----- .../mv_inference/inference/src/PoseDecoder.cpp | 1 - 5 files changed, 163 insertions(+), 51 deletions(-) diff --git a/include/mv_inference_private.h b/include/mv_inference_private.h index 95dff31..0d8a35e 100644 --- a/include/mv_inference_private.h +++ b/include/mv_inference_private.h @@ -34,11 +34,19 @@ extern "C" { * @since_tizen 6.0 * */ -typedef struct { +typedef struct mv_inference_landmark_s{ bool isAvailable; /**< Availability of landmark */ mv_point_s point; /**< 2D position of landmark */ int label; /**< Label of landmark */ float score; /**< Score of landmark */ + + mv_inference_landmark_s() { + isAvailable = false; + point.x = -1; + point.y = -1; + label = -1; + score = -1.0f; + } } mv_inference_landmark_s; /** diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h index 6c88b95..3fea65d 100644 --- a/mv_machine_learning/mv_inference/inference/include/Inference.h +++ b/mv_machine_learning/mv_inference/inference/include/Inference.h @@ -32,6 +32,8 @@ #include "PostProcess.h" #include "TensorBuffer.h" +#include "Landmark.h" + #define HUMAN_POSE_MAX_LANDMARKS 16 #define HUMAN_POSE_MAX_PARTS 6 diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h index c910d62..545c385 100644 --- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h +++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h @@ -22,11 +22,12 @@ #include #include - #include "TensorBuffer.h" #include "OutputMetadata.h" #include "Landmark.h" +#define MAX_NUMBER_OF_POSE 5 + /** * @file PoseDecoder.h * @brief This file contains the PoseDecoder class definition which diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 0a20d19..94582b3 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -18,7 +18,9 @@ #include "Inference.h" #include "InferenceIni.h" #include "ObjectDecoder.h" +#include "PoseDecoder.h" #include +#include #include #include @@ -471,6 +473,16 @@ namespace inference if (!outputMeta.GetNumber().GetName().empty()) mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName()); + + if (!outputMeta.GetLandmark().GetName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName()); + + if (!outputMeta.GetOffset().GetName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName()); + + for (auto& dispVec : outputMeta.GetDispVecAll()) { + mConfig.mOutputLayerNames.push_back(dispVec.GetName()); + } } inference_engine_layer_property property; @@ -480,6 +492,7 @@ namespace inference INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1}; for (auto& name : mConfig.mOutputLayerNames) { + LOGI("Configure %s layer as output", name.c_str()); property.layers.insert(std::make_pair(name, tensor_info)); } @@ -1665,66 +1678,155 @@ namespace inference int Inference::GetPoseLandmarkDetectionResults( mv_inference_pose_result_h *detectionResults, int width, int height) { - tensor_t outputData; + LOGI("ENTER"); + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.IsParsed()) { + auto& landmarkInfo = outputMeta.GetLandmark(); + auto& scoreInfo = outputMeta.GetScore(); + if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) || + !mOutputTensorBuffers.exist(scoreInfo.GetName())) { + LOGE("output buffers named of %s or %s are NULL", + landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } + int heatMapWidth = 0; + int heatMapHeight = 0; + int heatMapChannel = 0; + if (landmarkInfo.GetDecodingType() != 0) { + heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; + heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; + heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; + } - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); + LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); - long number_of_poses = 1; - long number_of_landmarks = inferDimInfo[0][3]; - float *tmp = static_cast(inferResults[0]); - cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); + if (mPoseResult == NULL) { + mPoseResult = new(std::nothrow) mv_inference_pose_s; + if (mPoseResult == NULL) { + LOGE("Fail to create result handle"); + return MEDIA_VISION_ERROR_INTERNAL; + } + // 2d+single or 2d+multi or 3d+single or 3d+multi + int defaultNumberOfPose = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 2) ? 1 : MAX_NUMBER_OF_POSE; + std::vector channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); + + // In case of DecodingType == 0, + // the landmarkChannel is guessed from the shape of the landmark output tensor. + // Otherwise, decoding heatmap, it is guessed from the heatMapChannel. + int landmarkChannel = 0; + if (landmarkInfo.GetDecodingType() == 0) { + landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] + / landmarkInfo.GetOffset(); + } else { + landmarkChannel = heatMapChannel; + } - cv::Point loc; - cv::Point2f loc2f; - double score; - cv::Mat blurredHeatMap; + mPoseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel : + static_cast(mUserListName.size()); - cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), - CV_32FC(inferDimInfo[0][3]), (void *) tmp); + LOGE("number of landmarks per pose: %d", mPoseResult->number_of_landmarks_per_pose ); + mPoseResult->landmarks = new mv_inference_landmark_s* [defaultNumberOfPose]; + for (int idx = 0; idx < defaultNumberOfPose; ++idx) { + mPoseResult->landmarks[idx] = new mv_inference_landmark_s [mPoseResult->number_of_landmarks_per_pose]; + } + } + + // decoding + PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta, + heatMapWidth, heatMapHeight, heatMapChannel, + mPoseResult->number_of_landmarks_per_pose); + + // initialize decorder queue with landmarks to be decoded. + int ret = poseDecoder.init(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to init poseDecoder"); + return ret; + } + + float inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); + float inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); + float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + poseDecoder.decode(inputW, inputH, thresRadius); + + int part = 0; + mPoseResult->number_of_poses = poseDecoder.getNumberOfPose(); + for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) { + for (int landmarkIndex = 0; landmarkIndex < mPoseResult->number_of_landmarks_per_pose; ++ landmarkIndex) { + part = landmarkIndex; + if (!mUserListName.empty()) { + part = std::stoi(mUserListName[landmarkIndex]) - 1; + if (part < 0) { + continue; + } + } - cv::Mat multiChannels[inferDimInfo[0][3]]; - split(reShapeTest, multiChannels); + mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true; + mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = + poseDecoder.getPointX(poseIndex, part) * static_cast(mSourceSize.width); + mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = + poseDecoder.getPointY(poseIndex, part) * static_cast(mSourceSize.height); + mPoseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex; + mPoseResult->landmarks[poseIndex][landmarkIndex].score = + poseDecoder.getScore(poseIndex, part); + } + } + *detectionResults = static_cast(mPoseResult); - float ratioX = static_cast(inferDimInfo[0][2]); - float ratioY = static_cast(inferDimInfo[0][1]); + } else { + tensor_t outputData; - if (mPoseResult == NULL) { - if(!mUserListName.empty()) { - number_of_landmarks = mUserListName.size(); + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; } - mPoseResult = new mv_inference_pose_s; + + std::vector > inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + + long number_of_poses = 1; + long number_of_landmarks = inferDimInfo[0][3]; + float *tmp = static_cast(inferResults[0]); + cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); + + cv::Point loc; + cv::Point2f loc2f; + double score; + cv::Mat blurredHeatMap; + + cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), + CV_32FC(inferDimInfo[0][3]), (void *) tmp); + + cv::Mat multiChannels[inferDimInfo[0][3]]; + split(reShapeTest, multiChannels); + + float ratioX = static_cast(inferDimInfo[0][2]); + float ratioY = static_cast(inferDimInfo[0][1]); + if (mPoseResult == NULL) { - LOGE("Fail to create result handle"); - return MEDIA_VISION_ERROR_INTERNAL; - } + if(!mUserListName.empty()) { + number_of_landmarks = mUserListName.size(); + } + mPoseResult = new mv_inference_pose_s; + if (mPoseResult == NULL) { + LOGE("Fail to create result handle"); + return MEDIA_VISION_ERROR_INTERNAL; + } - mPoseResult->number_of_poses= number_of_poses; - mPoseResult->number_of_landmarks_per_pose = number_of_landmarks; - mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses]; - for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { - mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks]; - for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; ++landmarkIndex) { - mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = false; - mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = -1; - mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = -1; - mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1; - mPoseResult->landmarks[poseIndex][landmarkIndex].score = -1.0f; + mPoseResult->number_of_poses= number_of_poses; + mPoseResult->number_of_landmarks_per_pose = number_of_landmarks; + mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses]; + for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { + mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks]; } } - } - int part = 0; - for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { - for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { + int part = 0; + for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { + for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { part = landmarkIndex; if (!mUserListName.empty()) { part = std::stoi(mUserListName[landmarkIndex]) - 1; @@ -1748,11 +1850,11 @@ namespace inference mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast(static_cast(height) * loc2f.y); mPoseResult->landmarks[poseIndex][landmarkIndex].score = score; mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1; + } } + *detectionResults = static_cast(mPoseResult); } - *detectionResults = static_cast(mPoseResult); - return MEDIA_VISION_ERROR_NONE; } diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index f30fbf9..271f068 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -22,7 +22,6 @@ #include #include -#define MAX_NUMBER_OF_POSE 5 #define MAX_NUMBER_OF_CORRECTION 3 namespace mediavision -- 2.7.4 From c50b5d925d4345d57683e6a382e3e269e292568a Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Thu, 20 May 2021 10:30:02 +0900 Subject: [PATCH 05/16] Update testsuite to run pose landmark models with meta files Change-Id: I62307b534309ecbbf0c4e97ff2029e62fbdaea44 Signed-off-by: Tae-Young Chung --- meta-template/pld_cpm_192x192.json | 51 ++++++++ .../pld_mobilenet_v1_posenet_multi_257x257.json | 142 +++++++++++++++++++++ .../inference/inference_test_suite.c | 111 +++++++++++++++- 3 files changed, 300 insertions(+), 4 deletions(-) create mode 100644 meta-template/pld_cpm_192x192.json create mode 100644 meta-template/pld_mobilenet_v1_posenet_multi_257x257.json diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json new file mode 100644 index 0000000..6a81e50 --- /dev/null +++ b/meta-template/pld_cpm_192x192.json @@ -0,0 +1,51 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "image", + "shape_type" : 1, + "shape_dims" : [ 1, 192, 192, 3], + "data_type" : 0, + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "Convolutional_Pose_Machine/stage_5_out", + "index" : [-1, 1, 1, 1], + "top_number" : 1 , + "threshold" : 0.3, + "score_type" : 0 + } + ], + "landmark" : [ + { + "name" : "Convolutional_Pose_Machine/stage_5_out", + "index" : [-1, 1, 1, 1], + "landmark_type" : 0, + "decoding_type" : 1, + "decoding_info" : + { + "heatmap" : + { + "shape_type": 1 + } + } + } + ] + } +} diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json new file mode 100644 index 0000000..93084ce --- /dev/null +++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json @@ -0,0 +1,142 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "sub_2", + "shape_type" : 1, + "shape_dims" : [ 1, 257, 257, 3], + "data_type" : 0, + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [255.0, 255.0, 255.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "MobilenetV1/heatmap_2/BiasAdd", + "index" : [-1, 1, 1, 1], + "top_number" : 1, + "threshold" : 0.65, + "score_type" : 1 + } + ], + "landmark" : [ + { + "name" : "MobilenetV1/heatmap_2/BiasAdd", + "index" : [-1, 1, 1, 1], + "landmark_type" : 1, + "decoding_type" : 2, + "decoding_info" : + { + "heatmap" : + { + "shape_type" : 1, + "nms_radius" : 50.0 + } + } + } + ], + "offset" : [ + { + "name" : "MobilenetV1/offset_2/BiasAdd", + "index" : [-1, 1, 1, 1], + "shape_type" : 1, + "dim_type" : 2 + } + ], + "displacement" : [ + { + "name" : "MobilenetV1/displacement_fwd_2/BiasAdd", + "index" : [-1, 1, 1, 1], + "shape_type" : 1, + "dim_type" : 2, + "type" : 0 + }, + { + "name" : "MobilenetV1/displacement_bwd_2/BiasAdd", + "index" : [-1, 1, 1, 1], + "shape_type" : 1, + "dim_type" : 2, + "type" : 1 + } + ], + "edgemap" : [ + { + "parent": 0, + "child": 1 + }, + { + "parent": 1, + "child": 3 + }, + { + "parent": 0, + "child": 2 + }, + { + "parent": 2, + "child": 4 + }, + { + "parent": 0, + "child": 5 + }, + { + "parent": 5, + "child": 7 + }, + { + "parent": 7, + "child": 9 + }, + { + "parent": 5, + "child": 11 + }, + { + "parent": 11, + "child": 13 + }, + { + "parent": 13, + "child": 15 + }, + { + "parent": 0, + "child": 6 + }, + { + "parent": 6, + "child": 8 + }, + { + "parent": 8, + "child": 10 + }, + { + "parent": 6, + "child": 12 + }, + { + "parent": 12, + "child": 14 + }, + { + "parent": 14, + "child": 16 + } + ] + } +} diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 877e022..579e9f1 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -171,6 +171,17 @@ #define PLD_MOTION_CAPTURE_MAPPING_FILE_PATH \ "/usr/share/capi-media-vision/models/PLD/mocap/example-mocap-mapping.txt" +/* + * Hosted models + */ +#define PLD_TFLITE_WEIGHT_CPM_192_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_cpm_192x192.tflite" + +#define PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_mobilenet_v1_posenet_multi_257x257.tflite" +#define PLD_LABEL_MOBILENET_V1_POSENET_257_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_mobilenet_v1_posenet_multi_label.txt" + /****** * Public model: * IC: mobilenet caffe, tf? @@ -3180,15 +3191,87 @@ int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg) return err; } +int perform_hosted_tflite_cpm_192_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PLD_TFLITE_WEIGHT_CPM_192_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_mobilenet_v1_posenet_257_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH); + +/* + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + PLD_LABEL_MOBILENET_V1_POSENET_257_PATH); +*/ + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} int perform_pose_landmark_detection() { int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5 }; + const int options[] = { 1, 2, 3, 4, 5, 6, 7 }; const char *names[] = { "Configuration", "TFLITE(CPU) + CPM", + "Hosted: TFLITE(CPU) + CPM", + "Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET", "Prepare", "Run", "Back" }; @@ -3213,7 +3296,7 @@ int perform_pose_landmark_detection() err = perform_configuration(&engine_cfg); } break; case 2: { - //perform SRID TweakCNN config + //perform TweakCNN config if (engine_cfg) { int err2 = mv_destroy_engine_config(engine_cfg); if (err2 != MEDIA_VISION_ERROR_NONE) @@ -3223,6 +3306,26 @@ int perform_pose_landmark_detection() err = perform_armnn_cpm_config(&engine_cfg); } break; case 3: { + //perform cpm config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_hosted_tflite_cpm_192_config(&engine_cfg); + } break; + case 4: { + //perform mobilenet-v1 posenet config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg); + } break; + case 5: { // create - configure - prepare if (infer) { int err2 = mv_inference_destroy(infer); @@ -3253,7 +3356,7 @@ int perform_pose_landmark_detection() break; } } break; - case 4: { + case 6: { if (mvSource) { int err2 = mv_destroy_source(mvSource); if (err2 != MEDIA_VISION_ERROR_NONE) @@ -3300,7 +3403,7 @@ int perform_pose_landmark_detection() unsigned long timeDiff = gettotalmillisec(diffspec); printf("elapsed time : %lu(ms)\n", timeDiff); } break; - case 5: { + case 7: { //perform destroy if (engine_cfg) { err = mv_destroy_engine_config(engine_cfg); -- 2.7.4 From 2f2fcec8d6e82787e222dbede3416a4c8b2dd9f1 Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Mon, 12 Jul 2021 17:05:42 +0900 Subject: [PATCH 06/16] [MVQA] Change mv_infer_test order Change-Id: I5a156f2192ec59fdf276413843af06852d47c2e4 Signed-off-by: Kwang Son --- script/mvqa/db.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/script/mvqa/db.py b/script/mvqa/db.py index 9190239..ba0b737 100644 --- a/script/mvqa/db.py +++ b/script/mvqa/db.py @@ -133,7 +133,12 @@ class Session: pass def verify(self, result, label): - pass + rv = result.decode('utf-8') + trunk = rv[rv.find('callback:'):] + time_target = 'time : ' + task_time = trunk[trunk.find( + time_target) + len(time_target): trunk.find('(ms)')] + return (int(task_time), trunk[:trunk.find('(ms)')]) def get_db_meta(self): for ele in self.nas_config: @@ -222,7 +227,7 @@ class FaceCascadeDetection(Session): class FaceDetectionTFlite(FaceCascadeDetection): def run(self, item): file_path = os.path.basename(item) - command = '3\n2\n1\n5\n1\n6\n' + \ + command = '3\n5\n1\n7\n1\n8\n' + \ os.path.join(self.work_dir, file_path) + '\n2\n2\n' subp = subprocess.run(['sdb', 'shell', 'mv_infer_test_suite'], stdout=subprocess.PIPE, input=command.encode('UTF-8')) @@ -234,7 +239,7 @@ class FaceDetectionTFlite(FaceCascadeDetection): time_target = 'time : ' task_time = trunk[trunk.find( time_target) + len(time_target): trunk.find('(ms)')] - return (task_time, trunk[:trunk.find('(ms)')]) + return (int(task_time), trunk[:trunk.find('(ms)')]) class ImageClassification(Session): @@ -255,14 +260,6 @@ class ImageClassification(Session): stdout=subprocess.PIPE, input=command.encode('UTF-8')) return subp.stdout - def verify(self, result, label): - rv = result.decode('utf-8') - trunk = rv[rv.find('callback:'):] - time_target = 'time : ' - task_time = trunk[trunk.find( - time_target) + len(time_target): trunk.find('(ms)')] - return (task_time, trunk[:trunk.find('(ms)')]) - def clean(self, item): file_path = os.path.basename(item) os.system('sdb shell rm ' + os.path.join(self.work_dir, file_path)) @@ -287,14 +284,6 @@ class ObjectDetection(Session): stdout=subprocess.PIPE, input=command.encode('UTF-8')) return subp.stdout - def verify(self, result, label): - rv = result.decode('utf-8') - trunk = rv[rv.find('callback:'):] - time_target = 'time : ' - task_time = trunk[trunk.find( - time_target) + len(time_target): trunk.find('(ms)')] - return (task_time, trunk[:trunk.find('(ms)')]) - def clean(self, item): file_path = os.path.basename(item) os.system('sdb shell rm ' + os.path.join(self.work_dir, file_path)) -- 2.7.4 From f9aae3fdfa3004e65f3af9d4b236f76989cc28b6 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Wed, 14 Jul 2021 11:52:12 +0900 Subject: [PATCH 07/16] Add coordinate to Landmark outputmetadata Coordinate outputmetadata of Landmark indicates that an model's landmark coordinate is based on ratio between 0 ~ 1 or pixel. According to it, internal processes are applied to get proper landmark results. Change-Id: I5e24b34cf6ca2c9e9404f082382fb7fc3d662e7f Signed-off-by: Tae-Young Chung --- meta-template/pld_cpm_192x192.json | 1 + meta-template/pld_mobilenet_v1_posenet_multi_257x257.json | 1 + .../mv_inference/inference/include/OutputMetadata.h | 2 ++ mv_machine_learning/mv_inference/inference/src/Inference.cpp | 9 +++++++-- .../mv_inference/inference/src/OutputMetadata.cpp | 8 ++++++++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json index 6a81e50..e4f573b 100644 --- a/meta-template/pld_cpm_192x192.json +++ b/meta-template/pld_cpm_192x192.json @@ -37,6 +37,7 @@ "name" : "Convolutional_Pose_Machine/stage_5_out", "index" : [-1, 1, 1, 1], "landmark_type" : 0, + "landmark_coordinate" : 1, "decoding_type" : 1, "decoding_info" : { diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json index 93084ce..1d69c7a 100644 --- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json +++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json @@ -37,6 +37,7 @@ "name" : "MobilenetV1/heatmap_2/BiasAdd", "index" : [-1, 1, 1, 1], "landmark_type" : 1, + "landmark_coordinate" : 1, "decoding_type" : 2, "decoding_info" : { diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index 45c6311..8a6973e 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -283,6 +283,7 @@ namespace inference DimInfo dimInfo; int type; /**< 0: 2d-single, 1: 2d-multi, 2: 3-single */ int offset; + int coordinate; /**< 0: ratio, 1: pixel */ int decodingType; /**< 0: decoding unnecessary, 1: decoding heatmap, 2: decoding heatmap with additional refine data */ @@ -295,6 +296,7 @@ namespace inference DimInfo GetDimInfo() { return dimInfo; } int GetType(); int GetOffset(); + int GetCoordinate(); int GetDecodingType(); DecodeInfo& GetDecodingInfo(); diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 94582b3..3a2d20f 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1744,9 +1744,14 @@ namespace inference return ret; } - float inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); - float inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); + float inputW = 1.f; + float inputH = 1.f; float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + if (landmarkInfo.GetCoordinate() == 1) { + inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); + inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); + } + poseDecoder.decode(inputW, inputH, thresRadius); int part = 0; diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 6c396f5..aab8b50 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -691,6 +691,9 @@ namespace inference offset = static_cast(json_object_get_int_member(pObject, "landmark_offset")); LOGI("landmark offset: %d", offset); + coordinate = static_cast(json_object_get_int_member(pObject, "landmark_coordinate")); + LOGI("landmark coordinate: %d", coordinate); + decodingType = static_cast(json_object_get_int_member(pObject, "decoding_type")); LOGI("landmark decodeing type: %d", decodingType); } @@ -709,6 +712,11 @@ namespace inference return offset; } + int Landmark::GetCoordinate() + { + return coordinate; + } + int Landmark::GetDecodingType() { return decodingType; -- 2.7.4 From a96f91674da4852ca81dbccf5a6d9460dd92e746 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Wed, 14 Jul 2021 16:01:34 +0900 Subject: [PATCH 08/16] Add facial landmark detection inference with outputmetadata Change-Id: I03deac554f22ec9fe079b38d9562fd667b854495 Signed-off-by: Tae-Young Chung --- .../mv_inference/inference/include/Inference.h | 3 +- .../mv_inference/inference/src/Inference.cpp | 112 ++++++++++++++++----- .../mv_inference/inference/src/PoseDecoder.cpp | 18 ++-- .../inference/src/mv_inference_open.cpp | 15 ++- 4 files changed, 112 insertions(+), 36 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h index 3fea65d..030f9ec 100644 --- a/mv_machine_learning/mv_inference/inference/include/Inference.h +++ b/mv_machine_learning/mv_inference/inference/include/Inference.h @@ -310,7 +310,8 @@ namespace inference * @since_tizen 5.5 * @return @c true on success, otherwise a negative error value */ - int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results); + int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results, + int width, int height); /** * @brief Gets the PoseLandmarkDetectionResults diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 3a2d20f..c7ca663 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1635,41 +1635,101 @@ namespace inference } int Inference::GetFacialLandMarkDetectionResults( - FacialLandMarkDetectionResults *detectionResults) + FacialLandMarkDetectionResults *detectionResults, int width, int height) { - tensor_t outputData; + LOGI("ENTER"); + FacialLandMarkDetectionResults results; + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.IsParsed()) { + auto& landmarkInfo = outputMeta.GetLandmark(); + auto& scoreInfo = outputMeta.GetScore(); + if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) || + !mOutputTensorBuffers.exist(scoreInfo.GetName())) { + LOGE("output buffers named of %s or %s are NULL", + landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } + int heatMapWidth = 0; + int heatMapHeight = 0; + int heatMapChannel = 0; + if (landmarkInfo.GetDecodingType() != 0) { + heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; + heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; + heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; + } + + int number_of_landmarks = 0; + std::vector channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); + if (landmarkInfo.GetDecodingType() == 0) { + LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); + number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] + / landmarkInfo.GetOffset(); + } else { + number_of_landmarks = heatMapChannel; + } + LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); + + // decoding + PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta, + heatMapWidth, heatMapHeight, heatMapChannel, + number_of_landmarks); + // initialize decorder queue with landmarks to be decoded. + int ret = poseDecoder.init(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to init poseDecoder"); + return ret; + } - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); + float inputW = 1.f; + float inputH = 1.f; + if (landmarkInfo.GetCoordinate() == 1) { + inputW = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); + inputH = static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); + } + float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + poseDecoder.decode(inputW, inputH, thresRadius); - long number_of_detections = inferDimInfo[0][1]; - float *loc = reinterpret_cast(inferResults[0]); + for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { + results.locations.push_back( + cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast(mSourceSize.width), + poseDecoder.getPointY(0, landmarkIndex) * static_cast(mSourceSize.height))); + } + results.number_of_landmarks = results.locations.size(); + *detectionResults = results; + } else { + tensor_t outputData; - FacialLandMarkDetectionResults results; - results.number_of_landmarks = 0; + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + std::vector > inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); - cv::Point point(0, 0); - results.number_of_landmarks = 0; - LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); - for (int idx = 0; idx < number_of_detections; idx += 2) { - point.x = static_cast(loc[idx] * mSourceSize.width); - point.y = static_cast(loc[idx + 1] * mSourceSize.height); + long number_of_detections = inferDimInfo[0][1]; + float *loc = reinterpret_cast(inferResults[0]); - results.locations.push_back(point); - results.number_of_landmarks++; + results.number_of_landmarks = 0; - LOGI("x:%d, y:%d", point.x, point.y); - } + cv::Point point(0, 0); + LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); + for (int idx = 0; idx < number_of_detections; idx += 2) { + point.x = static_cast(loc[idx] * mSourceSize.width); + point.y = static_cast(loc[idx + 1] * mSourceSize.height); + + results.locations.push_back(point); + results.number_of_landmarks++; - *detectionResults = results; + LOGI("x:%d, y:%d", point.x, point.y); + } + + *detectionResults = results; + } LOGE("Inference: FacialLandmarkDetectionResults: %d\n", results.number_of_landmarks); return MEDIA_VISION_ERROR_NONE; diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index 271f068..cce5143 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -318,14 +318,16 @@ namespace inference } else { // multi pose is not supported std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); - float poseScore = mTensorBuffer.getValue(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]); - if (scoreInfo.GetType() == 1) { - poseScore = PostProcess::sigmoid(poseScore); - } - if (poseScore < scoreInfo.GetThresHold()) { - LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold()); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; + float poseScore = scoreInfo.GetThresHold(); + if (!scoreIndexes.empty()) { + poseScore = mTensorBuffer.getValue(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]); + if (scoreInfo.GetType() == 1) { + poseScore = PostProcess::sigmoid(poseScore); + } + if (poseScore < scoreInfo.GetThresHold()) { + LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, scoreInfo.GetThresHold()); + return MEDIA_VISION_ERROR_NONE; + } } int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3; diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp index 1c4eb7e..c6bb99a 100644 --- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp @@ -771,6 +771,19 @@ int mv_inference_facial_landmark_detect_open( std::vector sources; std::vector rects; + unsigned int width, height; + ret = mv_source_get_width(source, &width); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get width"); + return ret; + } + + ret = mv_source_get_height(source, &height); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get height"); + return ret; + } + sources.push_back(source); if (roi != NULL) @@ -784,7 +797,7 @@ int mv_inference_facial_landmark_detect_open( FacialLandMarkDetectionResults facialLandMarkDetectionResults; ret = pInfer->GetFacialLandMarkDetectionResults( - &facialLandMarkDetectionResults); + &facialLandMarkDetectionResults, width, height); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to get inference results"); return ret; -- 2.7.4 From b4875a1fdb0fd25c9ef43fd498199a87a9feeb96 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Wed, 14 Jul 2021 16:04:28 +0900 Subject: [PATCH 09/16] Update testsuite to run facial landmark models with meta files Change-Id: Id3b8c985a7dcdc55f3e034107f7e4e9f5cd3760b Signed-off-by: Tae-Young Chung --- meta-template/fld_mediapipe_192x192.json | 46 ++++++++++ meta-template/fld_tweakcnn_128x128.json | 46 ++++++++++ .../inference/inference_test_suite.c | 99 +++++++++++++++++++++- 3 files changed, 188 insertions(+), 3 deletions(-) create mode 100644 meta-template/fld_mediapipe_192x192.json create mode 100644 meta-template/fld_tweakcnn_128x128.json diff --git a/meta-template/fld_mediapipe_192x192.json b/meta-template/fld_mediapipe_192x192.json new file mode 100644 index 0000000..562fb77 --- /dev/null +++ b/meta-template/fld_mediapipe_192x192.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input", + "shape_type" : 1, + "shape_dims" : [ 1, 192, 192, 3], + "data_type" : 0, + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [127.5, 127.5, 127.5], + "std" : [127.5, 127.5, 127.5] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "conv2d_30", + "index" : [-1, -1, -1, 1], + "top_number" : 1, + "threshold" : 0.3, + "score_type" : 1 + } + ], + "landmark" : [ + { + "name" : "conv2d_20", + "index" : [-1, -1, -1, 1], + "landmark_type" : 0, + "landmark_coordinate" : 1, + "decoding_type" : 0, + "landmark_offset" : 3 + } + ] + } +} diff --git a/meta-template/fld_tweakcnn_128x128.json b/meta-template/fld_tweakcnn_128x128.json new file mode 100644 index 0000000..7d8b48f --- /dev/null +++ b/meta-template/fld_tweakcnn_128x128.json @@ -0,0 +1,46 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "Placeholder", + "shape_type" : 1, + "shape_dims" : [ 1, 128, 128, 3], + "data_type" : 0, + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [0.0, 0.0, 0.0], + "std" : [1.0, 1.0, 1.0] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "fanet8ss_inference/fully_connected_1/Sigmoid", + "index" : [-1, -1, -1, -1], + "top_number" : 1, + "threshold" : 0.0, + "score_type" : 0 + } + ], + "landmark" : [ + { + "name" : "fanet8ss_inference/fully_connected_1/Sigmoid", + "index" : [-1, 1], + "landmark_type" : 0, + "landmark_coordinate" : 0, + "landmark_offset" : 2, + "decoding_type" : 0 + } + ] + } +} diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 579e9f1..b838945 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -161,6 +161,14 @@ #define FLD_OPENCV_CONFIG_CAFFE_PATH \ "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" +/* + * Hosted models + */ +#define FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH \ + "/usr/share/capi-media-vision/models/FLD/tflite/fld_tweakcnn_128x128.tflite" +#define FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH \ + "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.tflite" + //Pose Detection #define PLD_TFLITE_WEIGHT_PATH \ "/usr/share/capi-media-vision/models/PLD/tflite/pld-tflite-001.tflite" @@ -2917,15 +2925,80 @@ int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg) return err; } + +int perform_hosted_tflite_tweakCNN_128_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_mediapipe_192_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + int perform_facial_landmark_detection() { int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6 }; + const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; const char *names[] = { "Configuration", "Tflite(CPU) + TweakCNN", "OPENCV(CPU) + TweakCNN", + "Hosted: TFLite(TweakCNN)", + "Hosted: TFLite(MediaPipe)", "Prepare", "Run", "Back" }; @@ -2969,6 +3042,26 @@ int perform_facial_landmark_detection() err = perform_opencv_cnncascade(&engine_cfg); } break; case 4: { + //perform Hosted TweakCNN config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_hosted_tflite_tweakCNN_128_config(&engine_cfg); + } break; + case 5: { + //perform Hosted MediaPipe config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_hosted_tflite_mediapipe_192_config(&engine_cfg); + } break; + case 6: { // create - configure - prepare if (infer) { int err2 = mv_inference_destroy(infer); @@ -2999,7 +3092,7 @@ int perform_facial_landmark_detection() break; } } break; - case 5: { + case 7: { if (mvSource) { int err2 = mv_destroy_source(mvSource); if (err2 != MEDIA_VISION_ERROR_NONE) @@ -3046,7 +3139,7 @@ int perform_facial_landmark_detection() unsigned long timeDiff = gettotalmillisec(diffspec); printf("elapsed time : %lu(ms)\n", timeDiff); } break; - case 6: { + case 8: { //perform destroy if (engine_cfg) { err = mv_destroy_engine_config(engine_cfg); -- 2.7.4 From c946ec318ae4910a5cd8f62de8f4efac3a773484 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Fri, 16 Jul 2021 12:38:03 +0900 Subject: [PATCH 10/16] Remove unnecessary variables copying outputData, instead use outputData itself Change-Id: I03f67afaaa83042812de7323f9b90714ba37f80a Signed-off-by: Tae-Young Chung --- .../mv_inference/inference/src/Inference.cpp | 103 +++++++-------------- 1 file changed, 36 insertions(+), 67 deletions(-) diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index c7ca663..5ff3235 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1243,13 +1243,9 @@ namespace inference top_result_pq; float value = 0.0f; - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - - int count = inferDimInfo[0][1]; + int count = outputData.dimInfo[0][1]; LOGI("count: %d", count); - float *prediction = reinterpret_cast(inferResults[0]); + float *prediction = reinterpret_cast(outputData.data[0]); for (int i = 0; i < count; ++i) { value = prediction[i]; @@ -1270,20 +1266,17 @@ namespace inference } std::reverse(top_results.begin(), top_results.end()); - int classIdx = -1; ImageClassificationResults results; results.number_of_classes = 0; - for (size_t idx = 0; idx < top_results.size(); ++idx) { - if (top_results[idx].first < mThreshold) + for (auto& result : top_results) { + if (result.first < mThreshold) continue; - LOGI("idx:%zu", idx); - LOGI("classIdx: %d", top_results[idx].second); - LOGI("classProb: %f", top_results[idx].first); - - classIdx = top_results[idx].second; - results.indices.push_back(classIdx); - results.confidences.push_back(top_results[idx].first); - results.names.push_back(mUserListName[classIdx]); + + LOGI("class Idx: %d, Prob: %.4f", result.second, result.first); + + results.indices.push_back(result.second); + results.confidences.push_back(result.first); + results.names.push_back(mUserListName[result.second]); results.number_of_classes++; } @@ -1372,12 +1365,6 @@ namespace inference // In case of object detection, // a model may apply post-process but others may not. // Thus, those cases should be hanlded separately. - std::vector > inferDimInfo(outputData.dimInfo); - LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); - - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - LOGI("inferResults size: %zu", inferResults.size()); float *boxes = nullptr; float *classes = nullptr; @@ -1395,7 +1382,7 @@ namespace inference number_of_detections = static_cast( *reinterpret_cast(outputData.data[0])); - cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], + cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]); // boxes @@ -1418,11 +1405,11 @@ namespace inference scores = cvScores.ptr(0); } else { - boxes = reinterpret_cast(inferResults[0]); - classes = reinterpret_cast(inferResults[1]); - scores = reinterpret_cast(inferResults[2]); + boxes = reinterpret_cast(outputData.data[0]); + classes = reinterpret_cast(outputData.data[1]); + scores = reinterpret_cast(outputData.data[2]); number_of_detections = - (int) (*reinterpret_cast(inferResults[3])); + (int) (*reinterpret_cast(outputData.data[3])); } LOGI("number_of_detections = %d", number_of_detections); @@ -1542,12 +1529,6 @@ namespace inference // In case of object detection, // a model may apply post-process but others may not. // Thus, those cases should be handled separately. - std::vector > inferDimInfo(outputData.dimInfo); - LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); - - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - LOGI("inferResults size: %zu", inferResults.size()); float *boxes = nullptr; float *classes = nullptr; @@ -1565,7 +1546,7 @@ namespace inference number_of_detections = static_cast( *reinterpret_cast(outputData.data[0])); - cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], + cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]); // boxes @@ -1588,11 +1569,11 @@ namespace inference scores = cvScores.ptr(0); } else { - boxes = reinterpret_cast(inferResults[0]); - classes = reinterpret_cast(inferResults[1]); - scores = reinterpret_cast(inferResults[2]); + boxes = reinterpret_cast(outputData.data[0]); + classes = reinterpret_cast(outputData.data[1]); + scores = reinterpret_cast(outputData.data[2]); number_of_detections = static_cast( - *reinterpret_cast(inferResults[3])); + *reinterpret_cast(outputData.data[3])); } int left, top, right, bottom; @@ -1707,30 +1688,22 @@ namespace inference return ret; } - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); + int number_of_detections = outputData.dimInfo[0][1] >> 1; + float *loc = reinterpret_cast(outputData.data[0]); + results.number_of_landmarks = number_of_detections; + results.locations.resize(number_of_detections); - long number_of_detections = inferDimInfo[0][1]; - float *loc = reinterpret_cast(inferResults[0]); - - results.number_of_landmarks = 0; - - cv::Point point(0, 0); LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); - for (int idx = 0; idx < number_of_detections; idx += 2) { - point.x = static_cast(loc[idx] * mSourceSize.width); - point.y = static_cast(loc[idx + 1] * mSourceSize.height); - - results.locations.push_back(point); - results.number_of_landmarks++; + for (auto& point : results.locations) { + point.x = static_cast(*loc++ * mSourceSize.width); + point.y = static_cast(*loc++ * mSourceSize.height); LOGI("x:%d, y:%d", point.x, point.y); } *detectionResults = results; } - LOGE("Inference: FacialLandmarkDetectionResults: %d\n", + LOGI("Inference: FacialLandmarkDetectionResults: %d\n", results.number_of_landmarks); return MEDIA_VISION_ERROR_NONE; } @@ -1848,28 +1821,24 @@ namespace inference return ret; } - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - long number_of_poses = 1; - long number_of_landmarks = inferDimInfo[0][3]; - float *tmp = static_cast(inferResults[0]); - cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); + long number_of_landmarks = outputData.dimInfo[0][3]; + float *tmp = static_cast(outputData.data[0]); + cv::Size heatMapSize(outputData.dimInfo[0][1], outputData.dimInfo[0][2]); cv::Point loc; cv::Point2f loc2f; double score; cv::Mat blurredHeatMap; - cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), - CV_32FC(inferDimInfo[0][3]), (void *) tmp); + cv::Mat reShapeTest(cv::Size(outputData.dimInfo[0][2], outputData.dimInfo[0][1]), + CV_32FC(outputData.dimInfo[0][3]), (void *) tmp); - cv::Mat multiChannels[inferDimInfo[0][3]]; + cv::Mat multiChannels[outputData.dimInfo[0][3]]; split(reShapeTest, multiChannels); - float ratioX = static_cast(inferDimInfo[0][2]); - float ratioY = static_cast(inferDimInfo[0][1]); + float ratioX = static_cast(outputData.dimInfo[0][2]); + float ratioY = static_cast(outputData.dimInfo[0][1]); if (mPoseResult == NULL) { if(!mUserListName.empty()) { -- 2.7.4 From be33524781565d84e13f56c04fd777384d9c350a Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Thu, 15 Jul 2021 17:39:20 +0900 Subject: [PATCH 11/16] test: Add show_menu helper functions show_menu -> show_menu_linear: clean manual numbering show_menu_yes_or_no: clean name_last Change-Id: Ib2b1579cf16351a2d1299a6285b7f6e9a0aed540 Signed-off-by: Kwang Son --- .../common/testsuite_common/mv_testsuite_common.c | 29 ++++++ .../common/testsuite_common/mv_testsuite_common.h | 7 ++ .../inference/inference_test_suite.c | 108 +++++---------------- 3 files changed, 59 insertions(+), 85 deletions(-) diff --git a/test/testsuites/common/testsuite_common/mv_testsuite_common.c b/test/testsuites/common/testsuite_common/mv_testsuite_common.c index d368ef8..91f5bc1 100644 --- a/test/testsuites/common/testsuite_common/mv_testsuite_common.c +++ b/test/testsuites/common/testsuite_common/mv_testsuite_common.c @@ -272,3 +272,32 @@ int load_mv_source_from_file( return err; } + +int show_menu_linear(const char *title, const char **menu, size_t len_menu) +{ + printf("*********************************************\n"); + printf("* %38s *\n", title); + printf("*-------------------------------------------*\n"); + + for (size_t i = 0; i < len_menu; ++i) + printf("* %2i. %34s *\n", i + 1, menu[i]); + + printf("*********************************************\n\n"); + int selection = 0; + printf("Your choice: "); + if (scanf("%20i", &selection) == 0) { + if (scanf("%*[^\n]%*c") != 0) { + printf("ERROR: Reading the input line error.\n"); + return -1; + } + printf("ERROR: Incorrect input.\n"); + } + + return selection; +} + +int show_menu_yes_or_no(const char *title) +{ + const char *names_last[] = { "Yes", "No" }; + return show_menu_linear(title, names_last, 2); +} \ No newline at end of file diff --git a/test/testsuites/common/testsuite_common/mv_testsuite_common.h b/test/testsuites/common/testsuite_common/mv_testsuite_common.h index 1a5c2cd..66876e0 100644 --- a/test/testsuites/common/testsuite_common/mv_testsuite_common.h +++ b/test/testsuites/common/testsuite_common/mv_testsuite_common.h @@ -142,6 +142,13 @@ int show_menu( const char **names, int number_of_option); +int show_menu_linear( + const char *title, + const char **menu, + size_t len_menu); + +int show_menu_yes_or_no(const char *title); + /** * @brief Loads media source from JPEG image. * diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index b838945..24e1f03 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -328,30 +328,6 @@ void _image_classified_cb(mv_source_h source, const int number_of_classes, } } -int show_menu(const char *title, const int *options, const char **names, - int cnt) -{ - printf("*********************************************\n"); - printf("* %38s *\n", title); - printf("*-------------------------------------------*\n"); - int i = 0; - for (i = 0; i < cnt; ++i) - printf("* %2i. %34s *\n", options[i], names[i]); - - printf("*********************************************\n\n"); - int selection = 0; - printf("Your choice: "); - if (scanf("%20i", &selection) == 0) { - if (scanf("%*[^\n]%*c") != 0) { - printf("ERROR: Reading the input line error.\n"); - return -1; - } - printf("ERROR: Incorrect input.\n"); - } - - return selection; -} - int perform_configure_set_model_config_path(mv_engine_config_h engine_cfg) { int err = MEDIA_VISION_ERROR_NONE; @@ -576,7 +552,6 @@ int perform_configuration(mv_engine_config_h *engine_cfg) int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; const char *names[] = { "Set Model Configuration", "Set Model Weights", "Set Model Data Type", @@ -605,8 +580,7 @@ int perform_configuration(mv_engine_config_h *engine_cfg) } while (sel_opt == 0) { - sel_opt = show_menu("Select Actions: ", options, names, - ARRAY_SIZE(options)); + sel_opt = show_menu_linear("Select Actions: ", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: err = perform_configure_set_model_config_path(handle); @@ -1371,9 +1345,6 @@ int perform_image_classification() int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19 }; const char *names[] = { "Configuration", "TFLite(cpu + Mobilenet)", "OpenCV(cpu + Squeezenet)", @@ -1399,8 +1370,7 @@ int perform_image_classification() mv_source_h mvSource = NULL; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); + sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: { //perform configuration @@ -1680,13 +1650,8 @@ int perform_image_classification() } sel_opt = 0; - const int options_last[2] = { 1, 2 }; - const char *names_last[2] = { "Yes", "No" }; - while (sel_opt == 0) { - sel_opt = - show_menu("Run Image Classification again?: ", options_last, - names_last, ARRAY_SIZE(options_last)); + sel_opt = show_menu_yes_or_no("Run Image Classification again?: "); switch (sel_opt) { case 1: do_another = 1; @@ -2049,7 +2014,6 @@ int perform_object_detection() int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char *names[] = { "Configuration", "TFLITE(CPU) + MobileNetV1+SSD", "OPENCV(CPU) + MobileNetV1+SSD", @@ -2066,8 +2030,7 @@ int perform_object_detection() mv_source_h mvSource = NULL; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); + sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: { //perform configuration @@ -2255,12 +2218,8 @@ int perform_object_detection() } sel_opt = 0; - const int options_last[2] = { 1, 2 }; - const char *names_last[2] = { "Yes", "No" }; - while (sel_opt == 0) { - sel_opt = show_menu("Run Object Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); + sel_opt = show_menu_yes_or_no("Run Object Detection again?:"); switch (sel_opt) { case 1: do_another = 1; @@ -2563,7 +2522,6 @@ int perform_face_detection() int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; const char *names[] = { "Configuration", "TFLite(CPU) + MobileNetV1 + SSD", "OPENCV(CPU) + Resnet10 + SSD", @@ -2579,8 +2537,8 @@ int perform_face_detection() mv_source_h mvSource = NULL; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); + sel_opt = + show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: { //perform configuration @@ -2759,12 +2717,8 @@ int perform_face_detection() } sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - while (sel_opt == 0) { - sel_opt = show_menu("Run Face Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); + sel_opt = show_menu_yes_or_no("Run Face Detection again?:"); switch (sel_opt) { case 1: do_another = 1; @@ -2993,7 +2947,6 @@ int perform_facial_landmark_detection() int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; const char *names[] = { "Configuration", "Tflite(CPU) + TweakCNN", "OPENCV(CPU) + TweakCNN", @@ -3008,8 +2961,7 @@ int perform_facial_landmark_detection() mv_source_h mvSource = NULL; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); + sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: { //perform configuration @@ -3174,13 +3126,8 @@ int perform_facial_landmark_detection() } sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - while (sel_opt == 0) { - sel_opt = show_menu( - "Run Facial Landmark Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); + sel_opt = show_menu_yes_or_no("Run Facial Landmark Detection again?:"); switch (sel_opt) { case 1: do_another = 1; @@ -3360,7 +3307,6 @@ int perform_pose_landmark_detection() int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7 }; const char *names[] = { "Configuration", "TFLITE(CPU) + CPM", "Hosted: TFLITE(CPU) + CPM", @@ -3374,8 +3320,7 @@ int perform_pose_landmark_detection() mv_source_h mvSource = NULL; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); + sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: { //perform configuration @@ -3531,13 +3476,8 @@ int perform_pose_landmark_detection() } sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - while (sel_opt == 0) { - sel_opt = show_menu( - "Run Pose Landmark Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); + sel_opt = show_menu_yes_or_no("Run Pose Landmark Detection again?:"); switch (sel_opt) { case 1: do_another = 1; @@ -3582,20 +3522,21 @@ int main() { int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6 }; - const char *names[] = { "Image Classification", "Object Detection", - "Face Detection", "Facial Landmark Detection", - "Pose Landmark Detection", "Exit" }; + const char *names[] = { + "Image Classification", "Object Detection", + "Face Detection", "Facial Landmark Detection", + "Pose Landmark Detection", "Exit" + }; int err = MEDIA_VISION_ERROR_NONE; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); + sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); switch (sel_opt) { case 1: { err = perform_image_classification(); if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform image classification. ERROR[0x%x]\n", err); + printf("Fail to perform image classification. ERROR[0x%x]\n", + err); } } break; case 2: { @@ -3613,7 +3554,8 @@ int main() case 4: { err = perform_facial_landmark_detection(); if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", err); + printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", + err); } } break; case 5: { @@ -3634,12 +3576,8 @@ int main() int do_another = 0; sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - while (sel_opt == 0) { - sel_opt = - show_menu("Another action?: ", options_last, names_last, 2); + sel_opt = show_menu_yes_or_no("Another action?: "); switch (sel_opt) { case 1: do_another = 1; -- 2.7.4 From 88f1df36b481aa8ef7878062bc3a008e026d6064 Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Thu, 22 Jul 2021 22:19:11 -0400 Subject: [PATCH 12/16] test: Add movenet for pose detection Change-Id: Iba6c9fe101b0d3f0976aac41fc94ebd332d218e6 Signed-off-by: Kwang Son --- .../inference/inference_test_suite.c | 52 +++++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 24e1f03..50b19eb 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -190,6 +190,10 @@ #define PLD_LABEL_MOBILENET_V1_POSENET_257_PATH \ "/usr/share/capi-media-vision/models/PLD/tflite/pld_mobilenet_v1_posenet_multi_label.txt" +// https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4 +#define PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld_int8_movenet.tflite" + /****** * Public model: * IC: mobilenet caffe, tf? @@ -3302,6 +3306,39 @@ int perform_hosted_tflite_mobilenet_v1_posenet_257_config(mv_engine_config_h *en return err; } +int perform_hosted_tflite_int8_movenet(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + int perform_pose_landmark_detection() { int err = MEDIA_VISION_ERROR_NONE; @@ -3311,6 +3348,7 @@ int perform_pose_landmark_detection() "TFLITE(CPU) + CPM", "Hosted: TFLITE(CPU) + CPM", "Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET", + "Hosted: TFLITE(CPU) + INT8_MOVENET", "Prepare", "Run", "Back" }; @@ -3364,6 +3402,16 @@ int perform_pose_landmark_detection() err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg); } break; case 5: { + //perform int8 movenet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg); + } break; + case 6: { // create - configure - prepare if (infer) { int err2 = mv_inference_destroy(infer); @@ -3394,7 +3442,7 @@ int perform_pose_landmark_detection() break; } } break; - case 6: { + case 7: { if (mvSource) { int err2 = mv_destroy_source(mvSource); if (err2 != MEDIA_VISION_ERROR_NONE) @@ -3441,7 +3489,7 @@ int perform_pose_landmark_detection() unsigned long timeDiff = gettotalmillisec(diffspec); printf("elapsed time : %lu(ms)\n", timeDiff); } break; - case 7: { + case 8: { //perform destroy if (engine_cfg) { err = mv_destroy_engine_config(engine_cfg); -- 2.7.4 From ae8c51996a086998145617eba1f3ac6deebfb165 Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Sun, 1 Aug 2021 21:28:41 -0400 Subject: [PATCH 13/16] test: Remove test while loop While loop keep previous test context which contains allocated resource. This resource check only increase code not usability. Change-Id: I1f2bfdd17fd6305d313d98362c4549b82f214d62 Signed-off-by: Kwang Son --- .../inference/inference_test_suite.c | 105 ++++++--------------- 1 file changed, 30 insertions(+), 75 deletions(-) diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 50b19eb..5053cea 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -3568,79 +3568,34 @@ int perform_pose_landmark_detection() int main() { - int sel_opt = 0; - - const char *names[] = { - "Image Classification", "Object Detection", - "Face Detection", "Facial Landmark Detection", - "Pose Landmark Detection", "Exit" - }; - int err = MEDIA_VISION_ERROR_NONE; - while (sel_opt == 0) { - sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); - switch (sel_opt) { - case 1: { - err = perform_image_classification(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform image classification. ERROR[0x%x]\n", - err); - } - } break; - case 2: { - err = perform_object_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform object detection. ERROR[0x%x]\n", err); - } - } break; - case 3: { - err = perform_face_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform face detection. ERROR[0x%x]\n", err); - } - } break; - case 4: { - err = perform_facial_landmark_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", - err); - } - } break; - case 5: { - err = perform_pose_landmark_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform pose landmark detection"); - } - } break; - case 6: { - printf("Exit"); - } break; - default: - printf("Invalid option"); - sel_opt = 0; - continue; - } - - int do_another = 0; - - sel_opt = 0; - while (sel_opt == 0) { - sel_opt = show_menu_yes_or_no("Another action?: "); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - return 0; -} + const char *names[] = { "Image Classification", "Object Detection", + "Face Detection", "Facial Landmark Detection", + "Pose Landmark Detection" }; + + int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); + if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) { + printf("Invalid option"); + return -1; + } + switch (sel_opt) { + case 1: { + err = perform_image_classification(); + } break; + case 2: { + err = perform_object_detection(); + } break; + case 3: { + err = perform_face_detection(); + } break; + case 4: { + err = perform_facial_landmark_detection(); + } break; + case 5: { + err = perform_pose_landmark_detection(); + } break; + } + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to perform task. ERROR[0x%x]\n", err); + return err; +} \ No newline at end of file -- 2.7.4 From 0a7ab1f50e641c4937d2ca87ab0950067688860b Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Mon, 2 Aug 2021 03:37:46 -0400 Subject: [PATCH 14/16] test: Seperate resource allocation perform_pose_landmark_detection alloactate mv_source, mv_engine, mv_infer in a function so makes hard to deallocate resource and error handling. Change-Id: I80df947730b734e9cb4f95bc238d66cccbe0ca7d Signed-off-by: Kwang Son --- .../inference/inference_test_suite.c | 559 ++++++++------------- 1 file changed, 212 insertions(+), 347 deletions(-) diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 5053cea..e2df147 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -194,6 +194,12 @@ #define PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH \ "/usr/share/capi-media-vision/models/PLD/tflite/pld_int8_movenet.tflite" +#define TASK_IC 0 +#define TASK_OD 1 +#define TASK_FD 2 +#define TASK_FLD 3 +#define TASK_PLD 4 + /****** * Public model: * IC: mobilenet caffe, tf? @@ -332,6 +338,113 @@ void _image_classified_cb(mv_source_h source, const int number_of_classes, } } +int infer_task_with_img(char *img_file_name, mv_inference_h infer, int task_id) +{ + mv_source_h mvSource = NULL; + struct timespec s_tspec; + struct timespec e_tspec; + + int err = mv_create_source(&mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mvSource.\n"); + return err; + } + + err = load_mv_source_from_file(img_file_name, mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to load mvSource err: %d.\n", err); + return err; + } + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + switch (task_id) { + case TASK_IC: + err = mv_inference_image_classify(mvSource, infer, NULL, + _image_classified_cb, NULL); + break; + case TASK_OD: + err = mv_inference_object_detect(mvSource, infer, _object_detected_cb, + NULL); + break; + case TASK_FD: + err = mv_inference_face_detect(mvSource, infer, _face_detected_cb, + NULL); + break; + case TASK_FLD: + err = mv_inference_facial_landmark_detect( + mvSource, infer, NULL, _facial_landmark_detected_cb, NULL); + break; + case TASK_PLD: + err = mv_inference_pose_landmark_detect( + mvSource, infer, NULL, _pose_landmark_detected_cb, NULL); + break; + default: + err = MEDIA_VISION_ERROR_INVALID_PARAMETER; + break; + } + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to infer task [err:%i]\n", err); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("elapsed time : %lu(ms)\n", timeDiff); + + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + + return err; +} + +int infer_task(mv_inference_h infer, int task_id) +{ + char *in_file_name = NULL; + /* Load media source */ + while (input_string("Input file name to be inferred:", 1024, + &(in_file_name)) == -1) + printf("Incorrect input! Try again.\n"); + + int err = infer_task_with_img(in_file_name, infer, task_id); + free(in_file_name); + return err; +} + +int mv_inference_task_helper(mv_engine_config_h engine_cfg, int task_id) +{ + mv_inference_h infer = NULL; + + int err = mv_inference_create(&infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + return err; + } + + err = mv_inference_configure(infer, engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle [err:%i]\n", err); + goto clean_mv_inference; + } + + err = mv_inference_prepare(infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to prepare inference handle"); + goto clean_mv_inference; + } + + err = infer_task(infer, task_id); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to infer task"); + } + +clean_mv_inference: + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + return err; +} + int perform_configure_set_model_config_path(mv_engine_config_h engine_cfg) { int err = MEDIA_VISION_ERROR_NONE; @@ -3172,398 +3285,150 @@ int perform_facial_landmark_detection() return MEDIA_VISION_ERROR_NONE; } -int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg) +int engine_config_hosted_tflite_cpu(mv_engine_config_h handle, + const char *tf_weight) { - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); + int err = mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight); if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } return err; } - const char *inputNodeName = "image"; - const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, PLD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 192); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 192); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_cpm_192_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); + err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } return err; } - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - PLD_TFLITE_WEIGHT_CPM_192_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; + err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); return err; } -int perform_hosted_tflite_mobilenet_v1_posenet_257_config(mv_engine_config_h *engine_cfg) +int perform_armnn_cpm_config(mv_engine_config_h handle) { - int err = MEDIA_VISION_ERROR_NONE; + const char *inputNodeName = "image"; + const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" }; - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); + int err = engine_config_hosted_tflite_cpu(handle, PLD_TFLITE_WEIGHT_PATH); if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } return err; } - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH); + err = mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } -/* - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - PLD_LABEL_MOBILENET_V1_POSENET_257_PATH); -*/ - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); + err = mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); + err = mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } - *engine_cfg = handle; - return err; -} + err = mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } -int perform_hosted_tflite_int8_movenet(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; + err = mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); + err = mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 192); if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } return err; } - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH); + err = mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 192); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); + err = mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); + err = mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName); + if (err != MEDIA_VISION_ERROR_NONE) { + return err; + } - *engine_cfg = handle; + err = mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); return err; } int perform_pose_landmark_detection() { int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const char *names[] = { "Configuration", - "TFLITE(CPU) + CPM", - "Hosted: TFLITE(CPU) + CPM", - "Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET", - "Hosted: TFLITE(CPU) + INT8_MOVENET", - "Prepare", - "Run", - "Back" }; - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; + const char *names[] = { + "TFLITE(CPU) + CPM", + "Hosted: TFLITE(CPU) + CPM", + "Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET", + "Hosted: TFLITE(CPU) + INT8_MOVENET", + }; - while (sel_opt == 0) { - sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform TweakCNN config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_armnn_cpm_config(&engine_cfg); - } break; - case 3: { - //perform cpm config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_hosted_tflite_cpm_192_config(&engine_cfg); - } break; - case 4: { - //perform mobilenet-v1 posenet config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg); - } break; - case 5: { - //perform int8 movenet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg); - } break; - case 6: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } - - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 7: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource err: %d.\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_pose_landmark_detect( - mvSource, infer, NULL, _pose_landmark_detected_cb, NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - } break; - case 8: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code: %i\n", err); - } - - sel_opt = 0; - while (sel_opt == 0) { - sel_opt = show_menu_yes_or_no("Run Pose Landmark Detection again?:"); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; + int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); + if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) { + printf("Invalid option"); + return -1; } - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; + err = mv_create_engine_config(&engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + return err; } - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; + switch (sel_opt) { + case 1: { //perform TweakCNN config + err = perform_armnn_cpm_config(engine_cfg); + } break; + case 2: { //perform cpm config + err = engine_config_hosted_tflite_cpu(engine_cfg, + PLD_TFLITE_WEIGHT_CPM_192_PATH); + } break; + case 3: { //perform mobilenet-v1 posenet config + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH); + } break; + case 4: { //perform int8 movenet + err = engine_config_hosted_tflite_cpu( + engine_cfg, PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH); + } break; } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform config [err:%i]\n", err); + goto clean_pose_engine; } - return MEDIA_VISION_ERROR_NONE; + err = mv_inference_task_helper(engine_cfg, TASK_PLD); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to detect with engine [err:%i]\n", err); + +clean_pose_engine: + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + + return err; } int main() -- 2.7.4 From aa3185f2252bdebbfbf48cc3dab3d4c97f54aa7c Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Tue, 3 Aug 2021 21:42:19 -0400 Subject: [PATCH 15/16] test: Cleanup perform_facial_landmark_detection Base on commit 0a7ab1f50e641c4937d2ca87ab0950067688860b alloactate mv_source, mv_engine, mv_infer in a function so makes hard to deallocate resource and error handling. Change-Id: Ib242154ec33602a8a9788203cabc34e37e5f878d Signed-off-by: Kwang Son --- .../inference/inference_test_suite.c | 473 ++++----------------- 1 file changed, 90 insertions(+), 383 deletions(-) diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index e2df147..5fa02f2 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -200,6 +200,15 @@ #define TASK_FLD 3 #define TASK_PLD 4 +#define RET_IF_FAIL(exp) \ + do { \ + int err = (exp); \ + if (err != MEDIA_VISION_ERROR_NONE) { \ + printf("[%s] %s failed\n", __func__, #exp); \ + return err; \ + } \ + } while (0) + /****** * Public model: * IC: mobilenet caffe, tf? @@ -2876,413 +2885,111 @@ int perform_face_detection() return MEDIA_VISION_ERROR_NONE; } -int perform_tflite_TweakCNN(mv_engine_config_h *engine_cfg) +int perform_tflite_TweakCNN(mv_engine_config_h handle) { - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - const char *inputNodeName = "INPUT_TENSOR_NAME"; const char *outputNodeName[] = { "OUTPUT_TENSOR_NAME" }; - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FLD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 128); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 128); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; + RET_IF_FAIL( + engine_config_hosted_tflite_cpu(handle, FLD_TFLITE_WEIGHT_PATH)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 128)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 128)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName)); + RET_IF_FAIL(mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1)); + return MEDIA_VISION_ERROR_NONE; } -int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg) +int perform_opencv_cnncascade(mv_engine_config_h handle) { - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - const char *inputNodeName = "data"; const char *outputNodeName[] = { "Sigmoid_fc2" }; - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FLD_OPENCV_WEIGHT_CAFFE_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FLD_OPENCV_WEIGHT_CAFFE_PATH)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - FLD_OPENCV_CONFIG_CAFFE_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_OPENCV); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 128); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 128); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - - -int perform_hosted_tflite_tweakCNN_128_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_mediapipe_192_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; + FLD_OPENCV_CONFIG_CAFFE_PATH)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 127.5)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_OPENCV)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 128)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 128)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName)); + RET_IF_FAIL(mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1)); + return MEDIA_VISION_ERROR_NONE; } int perform_facial_landmark_detection() { int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const char *names[] = { "Configuration", - "Tflite(CPU) + TweakCNN", - "OPENCV(CPU) + TweakCNN", - "Hosted: TFLite(TweakCNN)", - "Hosted: TFLite(MediaPipe)", - "Prepare", - "Run", - "Back" }; - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform SRID TweakCNN config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_tflite_TweakCNN(&engine_cfg); - } break; - case 3: { - //perform CNN cascade - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - err = perform_opencv_cnncascade(&engine_cfg); - } break; - case 4: { - //perform Hosted TweakCNN config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_hosted_tflite_tweakCNN_128_config(&engine_cfg); - } break; - case 5: { - //perform Hosted MediaPipe config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_hosted_tflite_mediapipe_192_config(&engine_cfg); - } break; - case 6: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } - - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 7: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource. error code:%i\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_facial_landmark_detect( - mvSource, infer, NULL, _facial_landmark_detected_cb, NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - } break; - case 8: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code:%i\n", err); - } - - sel_opt = 0; - while (sel_opt == 0) { - sel_opt = show_menu_yes_or_no("Run Facial Landmark Detection again?:"); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } + const char *names[] = { + "Tflite(CPU) + TweakCNN", + "OPENCV(CPU) + TweakCNN", + "Hosted: TFLite(TweakCNN)", + "Hosted: TFLite(MediaPipe)", + }; - sel_opt = (do_another == 1) ? 0 : 1; + int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); + if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) { + printf("Invalid option"); + return -1; } - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } + RET_IF_FAIL(mv_create_engine_config(&engine_cfg)); - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; + switch (sel_opt) { + case 1: { + err = perform_tflite_TweakCNN(engine_cfg); + } break; + case 2: { + err = perform_opencv_cnncascade(engine_cfg); + } break; + case 3: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH); + } break; + case 4: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH); + } break; } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform config [err:%i]\n", err); + goto clean_facial_landmark_engine; } - return MEDIA_VISION_ERROR_NONE; + RET_IF_FAIL(mv_inference_task_helper(engine_cfg, TASK_FLD)); + +clean_facial_landmark_engine: + RET_IF_FAIL(mv_destroy_engine_config(engine_cfg)); + return err; } int engine_config_hosted_tflite_cpu(mv_engine_config_h handle, -- 2.7.4 From 9b045d0c27ad4dd8b5a51e90dd1bdefb90b1b4c1 Mon Sep 17 00:00:00 2001 From: Kwang Son Date: Wed, 4 Aug 2021 03:06:26 -0400 Subject: [PATCH 16/16] test: Cleanup perform_face_detection Base on commit 0a7ab1f50e641c4937d2ca87ab0950067688860b alloactate mv_source, mv_engine, mv_infer in a function so makes hard to deallocate resource and error handling. Change-Id: Iaf1124821e39c7785955bc9b06a49b5997bb0430 Signed-off-by: Kwang Son --- .../inference/inference_test_suite.c | 741 +++++---------------- 1 file changed, 175 insertions(+), 566 deletions(-) diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c index 5fa02f2..b47547e 100644 --- a/test/testsuites/machine_learning/inference/inference_test_suite.c +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -2386,503 +2386,159 @@ int perform_object_detection() return MEDIA_VISION_ERROR_NONE; } -int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg) +int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h handle) { - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "normalized_input_image_tensor"; - const char *outputNodeName[] = { "TFLite_Detection_PostProcess", - "TFLite_Detection_PostProcess:1", - "TFLite_Detection_PostProcess:2", - "TFLite_Detection_PostProcess:3" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); - - *engine_cfg = handle; - return err; -} - -int perform_opencv_resnet10ssd_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "data"; - const char *outputNodeName[] = { "detection_out" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FD_OPENCV_WEIGHT_CAFFE_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - FD_OPENCV_CONFIG_CAFFE_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 135.7); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_OPENCV); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - const char *inputNodeName = "normalized_input_image_tensor"; const char *outputNodeName[] = { "TFLite_Detection_PostProcess", - "TFLite_Detection_PostProcess:1", - "TFLite_Detection_PostProcess:2", - "TFLite_Detection_PostProcess:3" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_ARMNN); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_mobilenetv1ssd_300_config_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_blazeface_128_config_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FD_TFLITE_WEIGHT_BLAZEFACE_128_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_face_detection() -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const char *names[] = { "Configuration", - "TFLite(CPU) + MobileNetV1 + SSD", - "OPENCV(CPU) + Resnet10 + SSD", - "ARMNN(CPU) + MobileNetV1 + SSD", - "Hosted: TFLite(cpu + MobilenetV1+SSD)", - "Hosted: TFLite(cpu + BlazeFace)", - "Prepare", - "Run", - "Back" }; - - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = - show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform TFlit Mobilenetv1ssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_tflite_mobilenetv1ssd_face(&engine_cfg); - } break; - case 3: { - //perform Opencv resenet10ssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_opencv_resnet10ssd_face(&engine_cfg); - } break; - case 4: { - //perform Armnn Mobilenetv1ssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_armnn_mobilenetv1ssd_face(&engine_cfg); - } break; - case 5: { - //perform hosted TFlite Mobilenetv1ssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_hosted_tflite_mobilenetv1ssd_300_config_face(&engine_cfg); - } break; - case 6: { - //perform hosted TFlite blazeface config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_hosted_tflite_blazeface_128_config_face(&engine_cfg); - } break; - case 7: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 8: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource. error code:%i\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_face_detect(mvSource, infer, _face_detected_cb, - NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); + RET_IF_FAIL(engine_config_hosted_tflite_cpu(handle, FD_TFLITE_WEIGHT_PATH)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 127.5)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName)); + RET_IF_FAIL(mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4)); + return MEDIA_VISION_ERROR_NONE; +} - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - } break; - case 9: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } +int perform_opencv_resnet10ssd_face(mv_engine_config_h handle) +{ + const char *inputNodeName = "data"; + const char *outputNodeName[] = { "detection_out" }; - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FD_OPENCV_WEIGHT_CAFFE_PATH)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + FD_OPENCV_CONFIG_CAFFE_PATH)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 135.7)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_OPENCV)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName)); + RET_IF_FAIL(mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1)); + return MEDIA_VISION_ERROR_NONE; +} - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } +int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h handle) +{ + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code:%i\n", err); - } + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FD_TFLITE_WEIGHT_PATH)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 127.5)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_ARMNN)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName)); + RET_IF_FAIL(mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4)); + return MEDIA_VISION_ERROR_NONE; +} - sel_opt = 0; - while (sel_opt == 0) { - sel_opt = show_menu_yes_or_no("Run Face Detection again?:"); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } +int perform_face_detection() +{ + int err = MEDIA_VISION_ERROR_NONE; + mv_engine_config_h engine_cfg = NULL; + const char *names[] = { + "TFLite(CPU) + MobileNetV1 + SSD", + "OPENCV(CPU) + Resnet10 + SSD", + "ARMNN(CPU) + MobileNetV1 + SSD", + "Hosted: TFLite(cpu + MobilenetV1+SSD)", + "Hosted: TFLite(cpu + BlazeFace)", + }; - sel_opt = (do_another == 1) ? 0 : 1; + int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names)); + if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) { + printf("Invalid option"); + return -1; } - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } + RET_IF_FAIL(mv_create_engine_config(&engine_cfg)); - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; + switch (sel_opt) { + case 1: { + err = perform_tflite_mobilenetv1ssd_face(engine_cfg); + } break; + case 2: { + err = perform_opencv_resnet10ssd_face(engine_cfg); + } break; + case 3: { + err = perform_armnn_mobilenetv1ssd_face(engine_cfg); + } break; + case 4: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH); + } break; + case 5: { + err = engine_config_hosted_tflite_cpu( + engine_cfg, FD_TFLITE_WEIGHT_BLAZEFACE_128_PATH); + } break; } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform config [err:%i]\n", err); + goto clean_face_engine; } - return MEDIA_VISION_ERROR_NONE; + err = mv_inference_task_helper(engine_cfg, TASK_FD); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to detect with engine [err:%i]\n", err); + +clean_face_engine: + RET_IF_FAIL(mv_destroy_engine_config(engine_cfg)); + return err; } int perform_tflite_TweakCNN(mv_engine_config_h handle) @@ -2985,7 +2641,9 @@ int perform_facial_landmark_detection() goto clean_facial_landmark_engine; } - RET_IF_FAIL(mv_inference_task_helper(engine_cfg, TASK_FLD)); + err = mv_inference_task_helper(engine_cfg, TASK_FLD); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to detect with engine [err:%i]\n", err); clean_facial_landmark_engine: RET_IF_FAIL(mv_destroy_engine_config(engine_cfg)); @@ -2995,21 +2653,13 @@ clean_facial_landmark_engine: int engine_config_hosted_tflite_cpu(mv_engine_config_h handle, const char *tf_weight) { - int err = mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - return err; + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_TFLITE)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU)); + return MEDIA_VISION_ERROR_NONE; } int perform_armnn_cpm_config(mv_engine_config_h handle) @@ -3017,68 +2667,29 @@ int perform_armnn_cpm_config(mv_engine_config_h handle) const char *inputNodeName = "image"; const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" }; - int err = engine_config_hosted_tflite_cpu(handle, PLD_TFLITE_WEIGHT_PATH); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 192); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 192); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName); - if (err != MEDIA_VISION_ERROR_NONE) { - return err; - } - - err = mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - return err; + RET_IF_FAIL( + engine_config_hosted_tflite_cpu(handle, PLD_TFLITE_WEIGHT_PATH)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0)); + RET_IF_FAIL(mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 192)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 192)); + RET_IF_FAIL(mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3)); + RET_IF_FAIL(mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName)); + RET_IF_FAIL(mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1)); + return MEDIA_VISION_ERROR_NONE; } int perform_pose_landmark_detection() @@ -3105,18 +2716,18 @@ int perform_pose_landmark_detection() } switch (sel_opt) { - case 1: { //perform TweakCNN config + case 1: { err = perform_armnn_cpm_config(engine_cfg); } break; - case 2: { //perform cpm config + case 2: { err = engine_config_hosted_tflite_cpu(engine_cfg, PLD_TFLITE_WEIGHT_CPM_192_PATH); } break; - case 3: { //perform mobilenet-v1 posenet config + case 3: { err = engine_config_hosted_tflite_cpu( engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH); } break; - case 4: { //perform int8 movenet + case 4: { err = engine_config_hosted_tflite_cpu( engine_cfg, PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH); } break; @@ -3131,9 +2742,7 @@ int perform_pose_landmark_detection() printf("Fail to detect with engine [err:%i]\n", err); clean_pose_engine: - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); + RET_IF_FAIL(mv_destroy_engine_config(engine_cfg)); return err; } -- 2.7.4