From a5b92ea0bc8bebd388caeb92cf1c10041c46b158 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Thu, 20 May 2021 09:57:02 +0900
Subject: [PATCH 01/16] Add PoseDecoder and Landmark to decode various type of
 pose output tensor

Change-Id: I8be806ff3522aec1f7026912b8c317055e9e16db
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 .../mv_inference/inference/include/Landmark.h      |  53 +++
 .../mv_inference/inference/include/PoseDecoder.h   |  95 ++++
 .../mv_inference/inference/src/PoseDecoder.cpp     | 483 +++++++++++++++++++++
 3 files changed, 631 insertions(+)
 create mode 100644 mv_machine_learning/mv_inference/inference/include/Landmark.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
 create mode 100644 mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
new file mode 100644
index 0000000..63ccf60
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_LANDMARK_H__
+#define __MEDIA_VISION_LANDMARK_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <opencv2/core.hpp>
+
+/**
+ * @file Landmark.h
+ * @brief This file contains the Landmark class definition which
+ *        provides landmark information.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	typedef struct _LandmarkPoint
+	{
+		float score;
+		cv::Point heatMapLoc;
+		cv::Point2f decodedLoc;
+		int id;
+		bool valid;
+	} LandmarkPoint;
+
+	typedef struct _LandmarkResults
+	{
+		std::vector<LandmarkPoint> landmarks;
+		float score;
+	} LandmarkResults;
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_LANDMARK_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
new file mode 100644
index 0000000..c910d62
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSEDECODER_H__
+#define __MEDIA_VISION_POSEDECODER_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <list>
+
+
+#include "TensorBuffer.h"
+#include "OutputMetadata.h"
+#include "Landmark.h"
+
+/**
+ * @file PoseDecoder.h
+ * @brief This file contains the PoseDecoder class definition which
+ *        provides pose decoder.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class PoseDecoder
+	{
+	private:
+		TensorBuffer mTensorBuffer;
+		OutputMetadata mMeta;
+		int mHeatMapWidth;
+		int mHeatMapHeight;
+		int mHeatMapChannel;
+		int mNumberOfLandmarks;
+
+		std::list<LandmarkPoint> mCandidates;
+		std::vector<LandmarkResults> mPoseLandmarks;
+
+		int getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH);
+		int getPosToIndex(LandmarkPoint& landmark);
+		int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
+		int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+							float scaleW, float scaleH);
+		int traverseToNeighbor(int edgeId, int toId, int dir,
+								LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+								float scaleW, float scaleH);
+		int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+
+		int convertXYZtoX(int x, int y, int c);
+
+		cv::Point convertXYZtoXY(int x, int y, int c);
+
+	public:
+		PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData,
+					int heatMapWidth, int heatMapHeight, int heatMapChannel,
+					int numberOfLandmarks) :
+					mTensorBuffer(buffer),
+					mHeatMapWidth(heatMapWidth),
+					mHeatMapHeight(heatMapHeight),
+					mHeatMapChannel(heatMapChannel),
+					mNumberOfLandmarks(numberOfLandmarks) {
+						mMeta = metaData;
+					};
+
+		~PoseDecoder() = default;
+
+		int init();
+
+		int decode(float scaleWidth, float scaleHeight, float thresHoldRadius);
+
+		int getNumberOfPose();
+
+		float getPointX(int poseIdx, int partIdx);
+		float getPointY(int poseIdx, int partIdx);
+		float getScore(int poseIdx, int partIdx);
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_POSEDECODER_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
new file mode 100644
index 0000000..f30fbf9
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -0,0 +1,483 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "PoseDecoder.h"
+#include "PostProcess.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+
+#define MAX_NUMBER_OF_POSE 5
+#define MAX_NUMBER_OF_CORRECTION 3
+
+namespace mediavision
+{
+namespace inference
+{
+	int PoseDecoder::convertXYZtoX(int x, int y, int c)
+	{
+		return y * mHeatMapWidth * mHeatMapChannel
+					+ x * mHeatMapChannel
+					+ c;
+	}
+
+	cv::Point PoseDecoder::convertXYZtoXY(int x, int y, int c)
+	{
+		int idxY = y * mHeatMapWidth * mHeatMapChannel * 2
+					+ x * mHeatMapChannel * 2
+					+ c;
+
+		int idxX = idxY + mHeatMapChannel;
+
+		return cv::Point(idxX, idxY);
+	}
+
+	int PoseDecoder::init()
+	{
+		LOGI("ENTER");
+
+		Landmark& landmarkInfo = mMeta.GetLandmark();
+
+		if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) {
+			LOGE("Not supported landmark type");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		if (landmarkInfo.GetDecodingType() == 0) {
+			LOGI("Skip init");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		int x,y,c;
+		int sx, sy, ex, ey, dx, dy;
+		float score, localScore;
+		int idx;
+		bool isLocalMax;
+		ScoreInfo& scoreInfo = mMeta.GetScore();
+
+		mCandidates.clear();
+
+		if (landmarkInfo.GetType() == 0 ||
+			landmarkInfo.GetType() == 2) {
+			mCandidates.resize(mHeatMapChannel);
+		}
+
+		for (y = 0; y < mHeatMapHeight; ++y) {
+			for (x = 0; x < mHeatMapWidth; ++x) {
+				std::list<LandmarkPoint>::iterator candidate = mCandidates.begin();
+				for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
+					isLocalMax = true;
+					idx = convertXYZtoX(x, y, c);
+					score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+					if (scoreInfo.GetType() == 1) {
+						score = PostProcess::sigmoid(score);
+					}
+
+					if (score < scoreInfo.GetThresHold())
+						continue;
+
+					if (landmarkInfo.GetType() == 0 ||
+						landmarkInfo.GetType() == 2) {
+						if (score <= candidate->score)
+							continue;
+
+						candidate->score = score;
+						candidate->heatMapLoc.x = x;
+						candidate->heatMapLoc.y = y;
+						candidate->id = c;
+
+					} else { //landmarkInfo.type == 1
+						sx = std::max(x - 1, 0);
+						sy = std::max(y - 1, 0);
+						ex = std::min(x + 2, mHeatMapWidth);
+						ey = std::min(y + 2, mHeatMapHeight);
+
+						for (dy = sy; dy < ey; ++dy) {
+							for (dx = sx; dx < ex; ++dx) {
+								idx = convertXYZtoX(dx, dy, c);
+								localScore =  mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+								if (scoreInfo.GetType() == 1) {
+									localScore = PostProcess::sigmoid(localScore);
+								}
+								if (localScore > score) {
+									isLocalMax = false;
+									break;
+								}
+							}
+							if (isLocalMax == false)
+								break;
+						}
+
+						if (isLocalMax == false)
+							continue;
+
+						// add this to list
+						LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
+						std::list<LandmarkPoint>::iterator iter;
+						for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) {
+							if ((*iter).score < score) {
+								break;
+							}
+						}
+
+						LandmarkPoint localLandmark;
+						localLandmark.score = score;
+						localLandmark.heatMapLoc.x = x;
+						localLandmark.heatMapLoc.y = y;
+						localLandmark.id = c;
+						localLandmark.valid = false;
+						mCandidates.insert(iter, localLandmark);
+					}
+				}
+			}
+		} // end of init
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::getNumberOfPose()
+	{
+		return std::min(static_cast<int>(mPoseLandmarks.size()), MAX_NUMBER_OF_POSE);
+	}
+
+	int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
+	{
+		if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+			offsetVal.x = offsetVal.y = 0.f;
+			LOGI("No offset value");
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
+
+		try {
+			offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
+			offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+		} catch (const std::exception& e) {
+			LOGE("Fail to get value at (%d, %d) from %s",
+						idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	float PoseDecoder::getPointX(int poseIdx, int partIdx)
+	{
+		LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+		return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x;
+	}
+
+	float PoseDecoder::getPointY(int poseIdx, int partIdx)
+	{
+		LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+		return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y;
+	}
+
+	float PoseDecoder::getScore(int poseIdx, int partIdx)
+	{
+		return mPoseLandmarks[poseIdx].landmarks[partIdx].score;
+	}
+
+	int PoseDecoder::getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH)
+	{
+		if (scaleW <= 0.0f || scaleH <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		cv::Point2f offsetVal;
+		getOffsetValue(point, offsetVal);
+
+		point.decodedLoc.x = static_cast<float>(point.heatMapLoc.x) / static_cast<float>(mHeatMapWidth - 1);
+		point.decodedLoc.y = static_cast<float>(point.heatMapLoc.y) / static_cast<float>(mHeatMapHeight - 1);
+
+		point.decodedLoc.x += offsetVal.x / scaleW;
+		point.decodedLoc.y += offsetVal.y / scaleH;
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::getPosToIndex(LandmarkPoint& point)
+	{
+		cv::Point posVal;
+
+		posVal.x = roundf(point.decodedLoc.x * static_cast<float>(mHeatMapWidth - 1));
+		posVal.y = roundf(point.decodedLoc.y * static_cast<float>(mHeatMapHeight - 1));
+
+		posVal.x = std::max(std::min(posVal.x, mHeatMapWidth - 1), 0);
+		posVal.y = std::max(std::min(posVal.y, mHeatMapHeight - 1), 0);
+
+		point.heatMapLoc = posVal;
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::decode(float scaleWidth, float scaleHeight, float thresHoldRadius)
+	{
+		LOGI("ENTER");
+
+		if (scaleWidth <= 0.0f || scaleHeight <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleWidth, scaleHeight);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		mPoseLandmarks.clear();
+		LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
+
+		Landmark& landmarkInfo = mMeta.GetLandmark();
+		ScoreInfo& scoreInfo = mMeta.GetScore();
+
+		if (landmarkInfo.GetType() == 0 ||
+			landmarkInfo.GetType() == 2) { // single pose
+			mPoseLandmarks.resize(1);
+
+			if (landmarkInfo.GetDecodingType() == 0) { // direct decoding
+				mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
+			} else { // heatmap decoding
+				mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
+			}
+		}
+
+		if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding
+			while (!mCandidates.empty()) {
+
+				LandmarkPoint &root = mCandidates.front();
+
+				getIndexToPos(root, scaleWidth, scaleHeight);
+
+				if (landmarkInfo.GetType() == 0) {
+					root.valid = true;
+					mPoseLandmarks[0].landmarks[root.id] = root;
+					mPoseLandmarks[0].score += root.score;
+					mCandidates.pop_front();
+					continue;
+				}
+
+				LOGI("root id: %d", root.id);
+
+				if (thresHoldRadius > 0.0f) {
+					bool isSkip = false;
+					for (auto& result : mPoseLandmarks) {
+						cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc;
+						dfRadius -= root.decodedLoc;
+						float radius =
+							std::pow(dfRadius.x * scaleWidth, 2.0f) +
+							std::pow(dfRadius.y	* scaleHeight, 2.0f);
+						LOGI("id[%d], radius: %.f vs. %.f", root.id, radius, std::pow(thresHoldRadius, 2.0f));
+						if (radius <= std::pow(thresHoldRadius, 2.0f)) {
+							LOGI("Not local maximum, Skip this");
+							isSkip = true;
+							break;
+						}
+					}
+					if (isSkip) {
+						mCandidates.pop_front();
+						continue;
+					}
+				}
+
+				LOGI("Local maximum. Add this");
+
+				std::vector<LandmarkPoint> decodedLandmarks(mHeatMapChannel, initValue);
+
+				findPose(root, decodedLandmarks, scaleWidth, scaleHeight);
+
+				float poseScore = 0.0f;
+				for (auto& landmark : decodedLandmarks) {
+					poseScore += landmark.score;
+					LOGI("%.3f, %.3f", landmark.decodedLoc.x, landmark.decodedLoc.y);
+				}
+
+				mPoseLandmarks.push_back(LandmarkResults {decodedLandmarks, poseScore});
+				if (mPoseLandmarks.size() > MAX_NUMBER_OF_POSE)
+					break;
+				mCandidates.pop_front();
+			}
+
+			for (auto& pose : mPoseLandmarks) {
+				pose.score /= static_cast<float>(mHeatMapChannel);
+			}
+		} else {
+			// multi pose is not supported
+			std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+			float poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
+			if (scoreInfo.GetType() == 1) {
+				poseScore = PostProcess::sigmoid(poseScore);
+			}
+			if (poseScore < scoreInfo.GetThresHold()) {
+				LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold());
+				LOGI("LEAVE");
+				return MEDIA_VISION_ERROR_NONE;
+			}
+
+			int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
+			if (landmarkInfo.GetDecodingType() == 0) {
+				landmarkOffset = landmarkInfo.GetOffset();
+			}
+			for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
+					float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
+					float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+
+					mPoseLandmarks[0].landmarks[idx].score = poseScore;
+					mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
+					mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight);
+					mPoseLandmarks[0].landmarks[idx].id = idx;
+					mPoseLandmarks[0].landmarks[idx].valid =  true;
+
+					LOGI("idx[%d]: %.4f, %.4f", idx, px, py);
+			}
+
+			mPoseLandmarks[0].score = poseScore;
+		}
+
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+							float scaleW, float scaleH)
+	{
+		LOGI("ENTER");
+
+		if (scaleW <= 0.0f || scaleH <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		decodedLandmarks[root.id] = root;
+		decodedLandmarks[root.id].valid = true;
+		LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
+		LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
+
+		int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
+		for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
+			riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+			int fromKeyId = riter->second;
+			int toKeyId = riter->first;
+
+			if (decodedLandmarks[fromKeyId].valid == true &&
+				decodedLandmarks[toKeyId].valid == false) {
+				LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
+				traverseToNeighbor(index, toKeyId,  1,
+							decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+							scaleW, scaleH);
+				LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
+										decodedLandmarks[toKeyId].decodedLoc.x,
+										decodedLandmarks[toKeyId].decodedLoc.y,
+										decodedLandmarks[toKeyId].score);
+			}
+			index--;
+		}
+
+		index = 0;
+		for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
+			iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+			int fromKeyId = iter->first;
+			int toKeyId = iter->second;
+
+			if (decodedLandmarks[fromKeyId].valid == true &&
+				decodedLandmarks[toKeyId].valid == false) {
+				LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
+				traverseToNeighbor(index, toKeyId,  0,
+							decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+							scaleW, scaleH);
+			}
+			index++;
+		}
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+								LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+								float scaleW, float scaleH)
+	{
+		if (scaleW <= 0.0f || scaleH <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		cv::Point2f edgeVector(0.f, 0.f);
+		cv::Point nearHeatMapLoc;
+
+		LOGI("org: %.4f, %.4f", fromLandmark.decodedLoc.x, fromLandmark.decodedLoc.y);
+
+		// update heatMapLoc from decodedLoc;
+		nearHeatMapLoc.x = roundf(fromLandmark.decodedLoc.x
+					* static_cast<float>(mHeatMapWidth - 1));
+		nearHeatMapLoc.y = roundf(fromLandmark.decodedLoc.y
+					* static_cast<float>(mHeatMapHeight - 1));
+
+		nearHeatMapLoc.x = std::max(std::min(nearHeatMapLoc.x, mHeatMapWidth - 1), 0);
+		nearHeatMapLoc.y = std::max(std::min(nearHeatMapLoc.y, mHeatMapHeight - 1), 0);
+
+		LOGI("src: %d, %d", nearHeatMapLoc.x, nearHeatMapLoc.y);
+
+		getEdgeVector(nearHeatMapLoc, edgeId, dir, edgeVector);
+
+		LOGI("vector: %.4f, %.4f with edgeId %d", edgeVector.x, edgeVector.y, edgeId);
+		toLandmark.decodedLoc.x = fromLandmark.decodedLoc.x + edgeVector.x / scaleW;
+		toLandmark.decodedLoc.y = fromLandmark.decodedLoc.y + edgeVector.y / scaleH;
+		toLandmark.id = toId;
+		LOGI("tgt: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+		for (int iter = 0; iter < MAX_NUMBER_OF_CORRECTION; ++iter) {
+			getPosToIndex(toLandmark);
+			getIndexToPos(toLandmark, scaleW, scaleH);
+		}
+
+		int idx  = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
+		toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
+		if (mMeta.GetScore().GetType() == 1) {
+			toLandmark.score = PostProcess::sigmoid(toLandmark.score);
+		}
+
+		toLandmark.valid = true;
+		LOGI("Final: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+	{
+		LOGI("ENTER");
+
+		LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+		int idxY = index.y * mHeatMapWidth
+					* static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
+		idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+
+		int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+
+		for(auto& dispVec : mMeta.GetDispVecAll()){
+			if (dispVec.GetType() == dir) { // 0: forward
+				LOGI("%s", dispVec.GetName().c_str());
+				vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
+				vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
+			}
+		}
+
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+}
+}
-- 
2.7.4


From 444006a2fd5bf7e0ca38101e71023b847bb38d81 Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Thu, 24 Jun 2021 14:23:56 +0900
Subject: [PATCH 02/16] Fix printf format

Change-Id: I010e68687d67482a05ecd77e1bff4a6b72e27c21
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 mv_machine_learning/mv_inference/inference/src/Inference.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 0ce6306..0a20d19 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1263,7 +1263,7 @@ namespace inference
 			for (size_t idx = 0; idx < top_results.size(); ++idx) {
 				if (top_results[idx].first < mThreshold)
 					continue;
-				LOGI("idx:%lu", idx);
+				LOGI("idx:%zu", idx);
 				LOGI("classIdx: %d", top_results[idx].second);
 				LOGI("classProb: %f", top_results[idx].first);
 
-- 
2.7.4


From be22b20edb3472a06ae33f1b11ffe03248113e3e Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Thu, 24 Jun 2021 16:20:12 +0900
Subject: [PATCH 03/16] Clean unused package dependency

Change-Id: I0308800190cf895893f1592ae5c102799f909fee
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 packaging/capi-media-vision.spec | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index c180dfa..e2543f2 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,22 +1,20 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.2
+Version:     0.8.3
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
 Source0:     %{name}-%{version}.tar.gz
 BuildRequires: cmake
 BuildRequires: pkgconfig(capi-media-tool)
-BuildRequires: pkgconfig(libtbm)
 BuildRequires: pkgconfig(dlog)
 BuildRequires: pkgconfig(capi-system-info)
 BuildRequires: pkgconfig(opencv)
 BuildRequires: pkgconfig(json-glib-1.0)
-BuildRequires: dlogutil
-BuildRequires: pkgconfig(glib-2.0)
 BuildRequires: pkgconfig(iniparser)
 BuildRequires: pkgconfig(inference-engine-interface-common)
 %if !0%{?ml_only:1}
+BuildRequires: pkgconfig(glib-2.0)
 BuildRequires: pkgconfig(zbar)
 # Change to the pkgconfig(zint) after zint package refactor
 BuildRequires: zint
-- 
2.7.4


From d213865d9e2e8fd6bab3371cf455ba47eec9f779 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Thu, 20 May 2021 10:12:54 +0900
Subject: [PATCH 04/16] Add pose landmark detection inference with
 outputmetadata

Support models which require heatmap decoding with additional refinement
as well as models which just provide landmark results.

Change-Id: Ic993510d2655d488ea8a43e08a56c13d2f9bc94f
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 include/mv_inference_private.h                     |  10 +-
 .../mv_inference/inference/include/Inference.h     |   2 +
 .../mv_inference/inference/include/PoseDecoder.h   |   3 +-
 .../mv_inference/inference/src/Inference.cpp       | 198 ++++++++++++++++-----
 .../mv_inference/inference/src/PoseDecoder.cpp     |   1 -
 5 files changed, 163 insertions(+), 51 deletions(-)

diff --git a/include/mv_inference_private.h b/include/mv_inference_private.h
index 95dff31..0d8a35e 100644
--- a/include/mv_inference_private.h
+++ b/include/mv_inference_private.h
@@ -34,11 +34,19 @@ extern "C" {
   * @since_tizen 6.0
   *
   */
-typedef struct {
+typedef struct mv_inference_landmark_s{
     bool isAvailable;   /**< Availability of landmark */
     mv_point_s point;   /**< 2D position of landmark */
     int label;    /**< Label of landmark */
     float score;        /**< Score of landmark */
+
+    mv_inference_landmark_s() {
+      isAvailable = false;
+      point.x = -1;
+      point.y = -1;
+      label = -1;
+      score = -1.0f;
+    }
 } mv_inference_landmark_s;
 
 /**
diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h
index 6c88b95..3fea65d 100644
--- a/mv_machine_learning/mv_inference/inference/include/Inference.h
+++ b/mv_machine_learning/mv_inference/inference/include/Inference.h
@@ -32,6 +32,8 @@
 #include "PostProcess.h"
 #include "TensorBuffer.h"
 
+#include "Landmark.h"
+
 #define HUMAN_POSE_MAX_LANDMARKS 16
 #define HUMAN_POSE_MAX_PARTS 6
 
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
index c910d62..545c385 100644
--- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -22,11 +22,12 @@
 #include <map>
 #include <list>
 
-
 #include "TensorBuffer.h"
 #include "OutputMetadata.h"
 #include "Landmark.h"
 
+#define MAX_NUMBER_OF_POSE 5
+
 /**
  * @file PoseDecoder.h
  * @brief This file contains the PoseDecoder class definition which
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 0a20d19..94582b3 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -18,7 +18,9 @@
 #include "Inference.h"
 #include "InferenceIni.h"
 #include "ObjectDecoder.h"
+#include "PoseDecoder.h"
 #include <map>
+#include <list>
 
 #include <unistd.h>
 #include <fstream>
@@ -471,6 +473,16 @@ namespace inference
 
 			if (!outputMeta.GetNumber().GetName().empty())
 				mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName());
+
+			if (!outputMeta.GetLandmark().GetName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName());
+
+			if (!outputMeta.GetOffset().GetName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName());
+
+			for (auto& dispVec : outputMeta.GetDispVecAll()) {
+				mConfig.mOutputLayerNames.push_back(dispVec.GetName());
+			}
 		}
 
 		inference_engine_layer_property property;
@@ -480,6 +492,7 @@ namespace inference
 							INFERENCE_TENSOR_DATA_TYPE_FLOAT32,
 							1};
 		for (auto& name : mConfig.mOutputLayerNames) {
+			LOGI("Configure %s layer as output", name.c_str());
 			property.layers.insert(std::make_pair(name, tensor_info));
 		}
 
@@ -1665,66 +1678,155 @@ namespace inference
 	int Inference::GetPoseLandmarkDetectionResults(
 			mv_inference_pose_result_h *detectionResults, int width, int height)
 	{
-		tensor_t outputData;
+		LOGI("ENTER");
+		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+		if (outputMeta.IsParsed()) {
+			auto& landmarkInfo = outputMeta.GetLandmark();
+			auto& scoreInfo = outputMeta.GetScore();
+			if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
+				!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+				LOGE("output buffers named of %s or %s are NULL",
+					landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
 
-		// Get inference result and contain it to outputData.
-		int ret = FillOutputResult(outputData);
-		if (ret != MEDIA_VISION_ERROR_NONE) {
-				LOGE("Fail to get output result.");
-				return ret;
-		}
+			int heatMapWidth = 0;
+			int heatMapHeight = 0;
+			int heatMapChannel = 0;
+			if (landmarkInfo.GetDecodingType() != 0) {
+				heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+				heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+				heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+			}
 
-		std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-		std::vector<void *> inferResults(outputData.data.begin(),
-										outputData.data.end());
+			LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
 
-		long number_of_poses = 1;
-		long number_of_landmarks = inferDimInfo[0][3];
-		float *tmp = static_cast<float *>(inferResults[0]);
-		cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+			if (mPoseResult == NULL) {
+				mPoseResult = new(std::nothrow) mv_inference_pose_s;
+				if (mPoseResult == NULL) {
+					LOGE("Fail to create result handle");
+					return MEDIA_VISION_ERROR_INTERNAL;
+				}
+				// 2d+single or 2d+multi or 3d+single or 3d+multi
+				int defaultNumberOfPose = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 2) ? 1 : MAX_NUMBER_OF_POSE;
+				std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+
+				// In case of DecodingType == 0,
+				// the landmarkChannel is guessed from the shape of the landmark output tensor.
+				// Otherwise, decoding heatmap, it is guessed from the heatMapChannel.
+				int landmarkChannel = 0;
+				if (landmarkInfo.GetDecodingType() == 0) {
+					landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
+										/ landmarkInfo.GetOffset();
+				} else {
+					landmarkChannel = heatMapChannel;
+				}
 
-		cv::Point loc;
-		cv::Point2f loc2f;
-		double score;
-		cv::Mat blurredHeatMap;
+				mPoseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
+															static_cast<int>(mUserListName.size());
 
-		cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
-												CV_32FC(inferDimInfo[0][3]), (void *) tmp);
+				LOGE("number of landmarks per pose: %d", mPoseResult->number_of_landmarks_per_pose );
+				mPoseResult->landmarks = new mv_inference_landmark_s* [defaultNumberOfPose];
+				for (int idx = 0; idx < defaultNumberOfPose; ++idx) {
+					mPoseResult->landmarks[idx] = new mv_inference_landmark_s [mPoseResult->number_of_landmarks_per_pose];
+				}
+			}
+
+			// decoding
+			PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
+									heatMapWidth, heatMapHeight, heatMapChannel,
+									mPoseResult->number_of_landmarks_per_pose);
+
+			// initialize decorder queue with landmarks to be decoded.
+			int ret = poseDecoder.init();
+			if (ret != MEDIA_VISION_ERROR_NONE) {
+				LOGE("Fail to init poseDecoder");
+				return ret;
+			}
+
+			float inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
+			float inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+			float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+			poseDecoder.decode(inputW, inputH, thresRadius);
+
+			int part = 0;
+			mPoseResult->number_of_poses = poseDecoder.getNumberOfPose();
+			for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) {
+				for (int landmarkIndex = 0; landmarkIndex < mPoseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
+					part = landmarkIndex;
+					if (!mUserListName.empty()) {
+						part = std::stoi(mUserListName[landmarkIndex]) - 1;
+						if (part < 0) {
+							continue;
+						}
+					}
 
-		cv::Mat multiChannels[inferDimInfo[0][3]];
-		split(reShapeTest, multiChannels);
+					mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true;
+					mPoseResult->landmarks[poseIndex][landmarkIndex].point.x =
+										poseDecoder.getPointX(poseIndex, part) * static_cast<float>(mSourceSize.width);
+					mPoseResult->landmarks[poseIndex][landmarkIndex].point.y =
+										poseDecoder.getPointY(poseIndex, part) * static_cast<float>(mSourceSize.height);
+					mPoseResult->landmarks[poseIndex][landmarkIndex].label = landmarkIndex;
+					mPoseResult->landmarks[poseIndex][landmarkIndex].score =
+										poseDecoder.getScore(poseIndex, part);
+				}
+			}
+			*detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
 
-		float ratioX = static_cast<float>(inferDimInfo[0][2]);
-		float ratioY = static_cast<float>(inferDimInfo[0][1]);
+		} else {
+			tensor_t outputData;
 
-		if (mPoseResult == NULL) {
-			if(!mUserListName.empty()) {
-				number_of_landmarks = mUserListName.size();
+			// Get inference result and contain it to outputData.
+			int ret = FillOutputResult(outputData);
+			if (ret != MEDIA_VISION_ERROR_NONE) {
+					LOGE("Fail to get output result.");
+					return ret;
 			}
-			mPoseResult = new mv_inference_pose_s;
+
+			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
+			std::vector<void *> inferResults(outputData.data.begin(),
+											outputData.data.end());
+
+			long number_of_poses = 1;
+			long number_of_landmarks = inferDimInfo[0][3];
+			float *tmp = static_cast<float *>(inferResults[0]);
+			cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+
+			cv::Point loc;
+			cv::Point2f loc2f;
+			double score;
+			cv::Mat blurredHeatMap;
+
+			cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
+													CV_32FC(inferDimInfo[0][3]), (void *) tmp);
+
+			cv::Mat multiChannels[inferDimInfo[0][3]];
+			split(reShapeTest, multiChannels);
+
+			float ratioX = static_cast<float>(inferDimInfo[0][2]);
+			float ratioY = static_cast<float>(inferDimInfo[0][1]);
+
 			if (mPoseResult == NULL) {
-				LOGE("Fail to create result handle");
-				return MEDIA_VISION_ERROR_INTERNAL;
-			}
+				if(!mUserListName.empty()) {
+					number_of_landmarks = mUserListName.size();
+				}
+				mPoseResult = new mv_inference_pose_s;
+				if (mPoseResult == NULL) {
+					LOGE("Fail to create result handle");
+					return MEDIA_VISION_ERROR_INTERNAL;
+				}
 
-			mPoseResult->number_of_poses= number_of_poses;
-			mPoseResult->number_of_landmarks_per_pose = number_of_landmarks;
-			mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses];
-			for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
-				mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks];
-				for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; ++landmarkIndex) {
-					mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = false;
-					mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = -1;
-					mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = -1;
-					mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1;
-					mPoseResult->landmarks[poseIndex][landmarkIndex].score = -1.0f;
+				mPoseResult->number_of_poses= number_of_poses;
+				mPoseResult->number_of_landmarks_per_pose = number_of_landmarks;
+				mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses];
+				for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
+					mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks];
 				}
 			}
-		}
 
-		int part = 0;
-		for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
-			for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
+			int part = 0;
+			for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) {
+				for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
 					part = landmarkIndex;
 					if (!mUserListName.empty()) {
 						part = std::stoi(mUserListName[landmarkIndex]) - 1;
@@ -1748,11 +1850,11 @@ namespace inference
 					mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast<int>(static_cast<float>(height) * loc2f.y);
 					mPoseResult->landmarks[poseIndex][landmarkIndex].score = score;
 					mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1;
+				}
 			}
+			*detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
 		}
 
-		*detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
-
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index f30fbf9..271f068 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -22,7 +22,6 @@
 #include <fstream>
 #include <string>
 
-#define MAX_NUMBER_OF_POSE 5
 #define MAX_NUMBER_OF_CORRECTION 3
 
 namespace mediavision
-- 
2.7.4


From c50b5d925d4345d57683e6a382e3e269e292568a Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Thu, 20 May 2021 10:30:02 +0900
Subject: [PATCH 05/16] Update testsuite to run pose landmark models with meta
 files

Change-Id: I62307b534309ecbbf0c4e97ff2029e62fbdaea44
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 meta-template/pld_cpm_192x192.json                 |  51 ++++++++
 .../pld_mobilenet_v1_posenet_multi_257x257.json    | 142 +++++++++++++++++++++
 .../inference/inference_test_suite.c               | 111 +++++++++++++++-
 3 files changed, 300 insertions(+), 4 deletions(-)
 create mode 100644 meta-template/pld_cpm_192x192.json
 create mode 100644 meta-template/pld_mobilenet_v1_posenet_multi_257x257.json

diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json
new file mode 100644
index 0000000..6a81e50
--- /dev/null
+++ b/meta-template/pld_cpm_192x192.json
@@ -0,0 +1,51 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "image",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 192, 192, 3],
+                "data_type" : 0,
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Convolutional_Pose_Machine/stage_5_out",
+                "index" : [-1, 1, 1, 1],
+                "top_number" : 1 ,
+                "threshold" : 0.3,
+                "score_type" : 0
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "Convolutional_Pose_Machine/stage_5_out",
+                "index" : [-1, 1, 1, 1],
+                "landmark_type" : 0,
+                "decoding_type" : 1,
+                "decoding_info" :
+                {
+                    "heatmap" :
+                     {
+                         "shape_type": 1
+                     }
+                }
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
new file mode 100644
index 0000000..93084ce
--- /dev/null
+++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
@@ -0,0 +1,142 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "sub_2",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 257, 257, 3],
+                "data_type" : 0,
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [255.0, 255.0, 255.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "MobilenetV1/heatmap_2/BiasAdd",
+                "index" : [-1, 1, 1, 1],
+                "top_number" : 1,
+                "threshold" : 0.65,
+                "score_type" : 1
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "MobilenetV1/heatmap_2/BiasAdd",
+                "index" : [-1, 1, 1, 1],
+                "landmark_type" : 1,
+                "decoding_type" : 2,
+                "decoding_info" :
+                {
+                    "heatmap" :
+                     {
+                         "shape_type" : 1,
+                         "nms_radius" : 50.0
+                     }
+                }
+            }
+        ],
+        "offset" : [
+            {
+                "name" : "MobilenetV1/offset_2/BiasAdd",
+                "index" : [-1, 1, 1, 1],
+                "shape_type" : 1,
+                "dim_type" : 2
+            }
+        ],
+        "displacement" : [
+            {
+                "name" : "MobilenetV1/displacement_fwd_2/BiasAdd",
+                "index" : [-1, 1, 1, 1],
+                "shape_type" : 1,
+                "dim_type" : 2,
+                "type" : 0
+            },
+            {
+                "name" : "MobilenetV1/displacement_bwd_2/BiasAdd",
+                "index" : [-1, 1, 1, 1],
+                "shape_type" : 1,
+                "dim_type" : 2,
+                "type" : 1
+            }
+        ],
+        "edgemap" : [
+            {
+                "parent": 0,
+                "child": 1
+            },
+            {
+                "parent": 1,
+                "child": 3
+            },
+            {
+                "parent": 0,
+                "child": 2
+            },
+            {
+                "parent": 2,
+                "child": 4
+            },
+            {
+                "parent": 0,
+                "child": 5
+            },
+            {
+                "parent": 5,
+                "child": 7
+            },
+            {
+                "parent": 7,
+                "child": 9
+            },
+            {
+                "parent": 5,
+                "child": 11
+            },
+            {
+                "parent": 11,
+                "child": 13
+            },
+            {
+                "parent": 13,
+                "child": 15
+            },
+            {
+                "parent": 0,
+                "child": 6
+            },
+            {
+                "parent": 6,
+                "child": 8
+            },
+            {
+                "parent": 8,
+                "child": 10
+            },
+            {
+                "parent": 6,
+                "child": 12
+            },
+            {
+                "parent": 12,
+                "child": 14
+            },
+            {
+                "parent": 14,
+                "child": 16
+            }
+        ]
+    }
+}
diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 877e022..579e9f1 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -171,6 +171,17 @@
 #define PLD_MOTION_CAPTURE_MAPPING_FILE_PATH \
 	"/usr/share/capi-media-vision/models/PLD/mocap/example-mocap-mapping.txt"
 
+/*
+ * Hosted models
+ */
+#define PLD_TFLITE_WEIGHT_CPM_192_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_cpm_192x192.tflite"
+
+#define PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_mobilenet_v1_posenet_multi_257x257.tflite"
+#define PLD_LABEL_MOBILENET_V1_POSENET_257_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_mobilenet_v1_posenet_multi_label.txt"
+
 /******
  * Public model:
  *  IC: mobilenet caffe, tf?
@@ -3180,15 +3191,87 @@ int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg)
 	return err;
 }
 
+int perform_hosted_tflite_cpm_192_config(mv_engine_config_h *engine_cfg)
+{
+	int err = MEDIA_VISION_ERROR_NONE;
+
+	mv_engine_config_h handle = NULL;
+	err = mv_create_engine_config(&handle);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create engine configuration handle.\n");
+		if (handle) {
+			int err2 = mv_destroy_engine_config(handle);
+			if (err2 != MEDIA_VISION_ERROR_NONE) {
+				printf("Fail to destroy engine configuration.\n");
+			}
+		}
+		return err;
+	}
+
+	mv_engine_config_set_string_attribute(
+					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+					PLD_TFLITE_WEIGHT_CPM_192_PATH);
+
+	mv_engine_config_set_int_attribute(
+					handle, MV_INFERENCE_BACKEND_TYPE,
+					MV_INFERENCE_BACKEND_TFLITE);
+
+	mv_engine_config_set_int_attribute(
+					handle, MV_INFERENCE_TARGET_TYPE,
+					MV_INFERENCE_TARGET_CPU);
+
+	*engine_cfg = handle;
+	return err;
+}
+
+int perform_hosted_tflite_mobilenet_v1_posenet_257_config(mv_engine_config_h *engine_cfg)
+{
+	int err = MEDIA_VISION_ERROR_NONE;
+
+	mv_engine_config_h handle = NULL;
+	err = mv_create_engine_config(&handle);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create engine configuration handle.\n");
+		if (handle) {
+			int err2 = mv_destroy_engine_config(handle);
+			if (err2 != MEDIA_VISION_ERROR_NONE) {
+				printf("Fail to destroy engine configuration.\n");
+			}
+		}
+		return err;
+	}
+
+	mv_engine_config_set_string_attribute(
+					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+					PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH);
+
+/*
+	mv_engine_config_set_string_attribute(
+					handle, MV_INFERENCE_MODEL_USER_FILE_PATH,
+					PLD_LABEL_MOBILENET_V1_POSENET_257_PATH);
+*/
+	mv_engine_config_set_int_attribute(
+					handle, MV_INFERENCE_BACKEND_TYPE,
+					MV_INFERENCE_BACKEND_TFLITE);
+
+	mv_engine_config_set_int_attribute(
+					handle, MV_INFERENCE_TARGET_TYPE,
+					MV_INFERENCE_TARGET_CPU);
+
+	*engine_cfg = handle;
+	return err;
+}
 
 int perform_pose_landmark_detection()
 {
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5 };
+	const int options[] = { 1, 2, 3, 4, 5, 6, 7 };
 	const char *names[] = { "Configuration",
 						"TFLITE(CPU) + CPM",
+						"Hosted: TFLITE(CPU) + CPM",
+						"Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET",
 						"Prepare",
 						"Run",
 						"Back" };
@@ -3213,7 +3296,7 @@ int perform_pose_landmark_detection()
 			err = perform_configuration(&engine_cfg);
 		} break;
 		case 2: {
-			//perform SRID TweakCNN config
+			//perform TweakCNN config
 			if (engine_cfg) {
 				int err2 = mv_destroy_engine_config(engine_cfg);
 				if (err2 != MEDIA_VISION_ERROR_NONE)
@@ -3223,6 +3306,26 @@ int perform_pose_landmark_detection()
 			err = perform_armnn_cpm_config(&engine_cfg);
 		} break;
 		case 3: {
+			//perform cpm config
+			if (engine_cfg) {
+				int err2 = mv_destroy_engine_config(engine_cfg);
+				if (err2 != MEDIA_VISION_ERROR_NONE)
+					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+				engine_cfg = NULL;
+			}
+			err = perform_hosted_tflite_cpm_192_config(&engine_cfg);
+		} break;
+		case 4: {
+			//perform mobilenet-v1 posenet config
+			if (engine_cfg) {
+				int err2 = mv_destroy_engine_config(engine_cfg);
+				if (err2 != MEDIA_VISION_ERROR_NONE)
+					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+				engine_cfg = NULL;
+			}
+			err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg);
+		} break;
+		case 5: {
 			// create - configure - prepare
 			if (infer) {
 				int err2 = mv_inference_destroy(infer);
@@ -3253,7 +3356,7 @@ int perform_pose_landmark_detection()
 				break;
 			}
 		} break;
-		case 4: {
+		case 6: {
 			if (mvSource) {
 				int err2 = mv_destroy_source(mvSource);
 				if (err2 != MEDIA_VISION_ERROR_NONE)
@@ -3300,7 +3403,7 @@ int perform_pose_landmark_detection()
 			unsigned long timeDiff = gettotalmillisec(diffspec);
 			printf("elapsed time : %lu(ms)\n", timeDiff);
 		} break;
-		case 5: {
+		case 7: {
 			//perform destroy
 			if (engine_cfg) {
 				err = mv_destroy_engine_config(engine_cfg);
-- 
2.7.4


From 2f2fcec8d6e82787e222dbede3416a4c8b2dd9f1 Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Mon, 12 Jul 2021 17:05:42 +0900
Subject: [PATCH 06/16] [MVQA] Change mv_infer_test order

Change-Id: I5a156f2192ec59fdf276413843af06852d47c2e4
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 script/mvqa/db.py | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/script/mvqa/db.py b/script/mvqa/db.py
index 9190239..ba0b737 100644
--- a/script/mvqa/db.py
+++ b/script/mvqa/db.py
@@ -133,7 +133,12 @@ class Session:
         pass
 
     def verify(self, result, label):
-        pass
+        rv = result.decode('utf-8')
+        trunk = rv[rv.find('callback:'):]
+        time_target = 'time : '
+        task_time = trunk[trunk.find(
+            time_target) + len(time_target): trunk.find('(ms)')]
+        return (int(task_time), trunk[:trunk.find('(ms)')])
 
     def get_db_meta(self):
         for ele in self.nas_config:
@@ -222,7 +227,7 @@ class FaceCascadeDetection(Session):
 class FaceDetectionTFlite(FaceCascadeDetection):
     def run(self, item):
         file_path = os.path.basename(item)
-        command = '3\n2\n1\n5\n1\n6\n' + \
+        command = '3\n5\n1\n7\n1\n8\n' + \
             os.path.join(self.work_dir, file_path) + '\n2\n2\n'
         subp = subprocess.run(['sdb', 'shell', 'mv_infer_test_suite'],
                               stdout=subprocess.PIPE, input=command.encode('UTF-8'))
@@ -234,7 +239,7 @@ class FaceDetectionTFlite(FaceCascadeDetection):
         time_target = 'time : '
         task_time = trunk[trunk.find(
             time_target) + len(time_target): trunk.find('(ms)')]
-        return (task_time, trunk[:trunk.find('(ms)')])
+        return (int(task_time), trunk[:trunk.find('(ms)')])
 
 
 class ImageClassification(Session):
@@ -255,14 +260,6 @@ class ImageClassification(Session):
                               stdout=subprocess.PIPE, input=command.encode('UTF-8'))
         return subp.stdout
 
-    def verify(self, result, label):
-        rv = result.decode('utf-8')
-        trunk = rv[rv.find('callback:'):]
-        time_target = 'time : '
-        task_time = trunk[trunk.find(
-            time_target) + len(time_target): trunk.find('(ms)')]
-        return (task_time, trunk[:trunk.find('(ms)')])
-
     def clean(self, item):
         file_path = os.path.basename(item)
         os.system('sdb shell rm ' + os.path.join(self.work_dir, file_path))
@@ -287,14 +284,6 @@ class ObjectDetection(Session):
                               stdout=subprocess.PIPE, input=command.encode('UTF-8'))
         return subp.stdout
 
-    def verify(self, result, label):
-        rv = result.decode('utf-8')
-        trunk = rv[rv.find('callback:'):]
-        time_target = 'time : '
-        task_time = trunk[trunk.find(
-            time_target) + len(time_target): trunk.find('(ms)')]
-        return (task_time, trunk[:trunk.find('(ms)')])
-
     def clean(self, item):
         file_path = os.path.basename(item)
         os.system('sdb shell rm ' + os.path.join(self.work_dir, file_path))
-- 
2.7.4


From f9aae3fdfa3004e65f3af9d4b236f76989cc28b6 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Wed, 14 Jul 2021 11:52:12 +0900
Subject: [PATCH 07/16] Add coordinate to Landmark outputmetadata

Coordinate outputmetadata of Landmark indicates that
an model's landmark coordinate is based on ratio between 0 ~ 1 or
pixel. According to it, internal processes are applied to get proper
landmark results.

Change-Id: I5e24b34cf6ca2c9e9404f082382fb7fc3d662e7f
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 meta-template/pld_cpm_192x192.json                               | 1 +
 meta-template/pld_mobilenet_v1_posenet_multi_257x257.json        | 1 +
 .../mv_inference/inference/include/OutputMetadata.h              | 2 ++
 mv_machine_learning/mv_inference/inference/src/Inference.cpp     | 9 +++++++--
 .../mv_inference/inference/src/OutputMetadata.cpp                | 8 ++++++++
 5 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json
index 6a81e50..e4f573b 100644
--- a/meta-template/pld_cpm_192x192.json
+++ b/meta-template/pld_cpm_192x192.json
@@ -37,6 +37,7 @@
                 "name" : "Convolutional_Pose_Machine/stage_5_out",
                 "index" : [-1, 1, 1, 1],
                 "landmark_type" : 0,
+                "landmark_coordinate" : 1,
                 "decoding_type" : 1,
                 "decoding_info" :
                 {
diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
index 93084ce..1d69c7a 100644
--- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
+++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
@@ -37,6 +37,7 @@
                 "name" : "MobilenetV1/heatmap_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
                 "landmark_type" : 1,
+                "landmark_coordinate" : 1,
                 "decoding_type" : 2,
                 "decoding_info" :
                 {
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index 45c6311..8a6973e 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -283,6 +283,7 @@ namespace inference
 		DimInfo dimInfo;
 		int type; /**< 0: 2d-single, 1: 2d-multi, 2: 3-single */
 		int offset;
+		int coordinate; /**< 0: ratio, 1: pixel */
 		int decodingType; /**< 0: decoding  unnecessary,
 							1: decoding heatmap,
 							2: decoding heatmap with additional refine data */
@@ -295,6 +296,7 @@ namespace inference
 		DimInfo GetDimInfo() { return dimInfo; }
 		int GetType();
 		int GetOffset();
+		int GetCoordinate();
 		int GetDecodingType();
 		DecodeInfo& GetDecodingInfo();
 
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 94582b3..3a2d20f 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1744,9 +1744,14 @@ namespace inference
 				return ret;
 			}
 
-			float inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
-			float inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+			float inputW = 1.f;
+			float inputH = 1.f;
 			float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+			if (landmarkInfo.GetCoordinate() == 1) {
+				inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
+				inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+			}
+
 			poseDecoder.decode(inputW, inputH, thresRadius);
 
 			int part = 0;
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 6c396f5..aab8b50 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -691,6 +691,9 @@ namespace inference
 			offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
 			LOGI("landmark offset: %d", offset);
 
+			coordinate = static_cast<int>(json_object_get_int_member(pObject, "landmark_coordinate"));
+			LOGI("landmark coordinate: %d", coordinate);
+
 			decodingType = static_cast<int>(json_object_get_int_member(pObject, "decoding_type"));
 			LOGI("landmark decodeing type: %d", decodingType);
 		}
@@ -709,6 +712,11 @@ namespace inference
 		return offset;
 	}
 
+	int Landmark::GetCoordinate()
+	{
+		return coordinate;
+	}
+
 	int Landmark::GetDecodingType()
 	{
 		return decodingType;
-- 
2.7.4


From a96f91674da4852ca81dbccf5a6d9460dd92e746 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Wed, 14 Jul 2021 16:01:34 +0900
Subject: [PATCH 08/16] Add facial landmark detection inference with
 outputmetadata

Change-Id: I03deac554f22ec9fe079b38d9562fd667b854495
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 .../mv_inference/inference/include/Inference.h     |   3 +-
 .../mv_inference/inference/src/Inference.cpp       | 112 ++++++++++++++++-----
 .../mv_inference/inference/src/PoseDecoder.cpp     |  18 ++--
 .../inference/src/mv_inference_open.cpp            |  15 ++-
 4 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h
index 3fea65d..030f9ec 100644
--- a/mv_machine_learning/mv_inference/inference/include/Inference.h
+++ b/mv_machine_learning/mv_inference/inference/include/Inference.h
@@ -310,7 +310,8 @@ namespace inference
 		 * @since_tizen 5.5
 		 * @return @c true on success, otherwise a negative error value
 		 */
-		int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results);
+		int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results,
+										int width, int height);
 
 		/**
 		 * @brief	Gets the PoseLandmarkDetectionResults
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 3a2d20f..c7ca663 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1635,41 +1635,101 @@ namespace inference
 	}
 
 	int Inference::GetFacialLandMarkDetectionResults(
-			FacialLandMarkDetectionResults *detectionResults)
+			FacialLandMarkDetectionResults *detectionResults, int width, int height)
 	{
-		tensor_t outputData;
+		LOGI("ENTER");
+		FacialLandMarkDetectionResults results;
+		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+		if (outputMeta.IsParsed()) {
+			auto& landmarkInfo = outputMeta.GetLandmark();
+			auto& scoreInfo = outputMeta.GetScore();
+			if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
+				!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+				LOGE("output buffers named of %s or %s are NULL",
+					landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
 
-		// Get inference result and contain it to outputData.
-		int ret = FillOutputResult(outputData);
-		if (ret != MEDIA_VISION_ERROR_NONE) {
-			LOGE("Fail to get output result.");
-			return ret;
-		}
+			int heatMapWidth = 0;
+			int heatMapHeight = 0;
+			int heatMapChannel = 0;
+			if (landmarkInfo.GetDecodingType() != 0) {
+				heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+				heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+				heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+			}
+
+			int number_of_landmarks = 0;
+			std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+			if (landmarkInfo.GetDecodingType() == 0) {
+				LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
+				number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
+									/ landmarkInfo.GetOffset();
+			} else {
+				number_of_landmarks = heatMapChannel;
+			}
+			LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
+
+			// decoding
+			PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
+									heatMapWidth, heatMapHeight, heatMapChannel,
+									number_of_landmarks);
+			// initialize decorder queue with landmarks to be decoded.
+			int ret = poseDecoder.init();
+			if (ret != MEDIA_VISION_ERROR_NONE) {
+				LOGE("Fail to init poseDecoder");
+				return ret;
+			}
 
-		std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-		std::vector<void *> inferResults(outputData.data.begin(),
-										 outputData.data.end());
+			float inputW = 1.f;
+			float inputH = 1.f;
+			if (landmarkInfo.GetCoordinate() == 1) {
+				inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
+				inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+			}
+			float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+			poseDecoder.decode(inputW, inputH, thresRadius);
 
-		long number_of_detections = inferDimInfo[0][1];
-		float *loc = reinterpret_cast<float *>(inferResults[0]);
+			for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
+				results.locations.push_back(
+					cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
+							  poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height)));
+			}
+			results.number_of_landmarks = results.locations.size();
+			*detectionResults = results;
+		} else {
+			tensor_t outputData;
 
-		FacialLandMarkDetectionResults results;
-		results.number_of_landmarks = 0;
+			// Get inference result and contain it to outputData.
+			int ret = FillOutputResult(outputData);
+			if (ret != MEDIA_VISION_ERROR_NONE) {
+				LOGE("Fail to get output result.");
+				return ret;
+			}
+
+			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
+			std::vector<void *> inferResults(outputData.data.begin(),
+											outputData.data.end());
 
-		cv::Point point(0, 0);
-		results.number_of_landmarks = 0;
-		LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
-		for (int idx = 0; idx < number_of_detections; idx += 2) {
-			point.x = static_cast<int>(loc[idx] * mSourceSize.width);
-			point.y = static_cast<int>(loc[idx + 1] * mSourceSize.height);
+			long number_of_detections = inferDimInfo[0][1];
+			float *loc = reinterpret_cast<float *>(inferResults[0]);
 
-			results.locations.push_back(point);
-			results.number_of_landmarks++;
+			results.number_of_landmarks = 0;
 
-			LOGI("x:%d, y:%d", point.x, point.y);
-		}
+			cv::Point point(0, 0);
+			LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
+			for (int idx = 0; idx < number_of_detections; idx += 2) {
+				point.x = static_cast<int>(loc[idx] * mSourceSize.width);
+				point.y = static_cast<int>(loc[idx + 1] * mSourceSize.height);
+
+				results.locations.push_back(point);
+				results.number_of_landmarks++;
 
-		*detectionResults = results;
+				LOGI("x:%d, y:%d", point.x, point.y);
+			}
+
+			*detectionResults = results;
+		}
 		LOGE("Inference: FacialLandmarkDetectionResults: %d\n",
 			 results.number_of_landmarks);
 		return MEDIA_VISION_ERROR_NONE;
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index 271f068..cce5143 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -318,14 +318,16 @@ namespace inference
 		} else {
 			// multi pose is not supported
 			std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
-			float poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
-			if (scoreInfo.GetType() == 1) {
-				poseScore = PostProcess::sigmoid(poseScore);
-			}
-			if (poseScore < scoreInfo.GetThresHold()) {
-				LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold());
-				LOGI("LEAVE");
-				return MEDIA_VISION_ERROR_NONE;
+			float poseScore = scoreInfo.GetThresHold();
+			if (!scoreIndexes.empty()) {
+				poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
+				if (scoreInfo.GetType() == 1) {
+					poseScore = PostProcess::sigmoid(poseScore);
+				}
+				if (poseScore < scoreInfo.GetThresHold()) {
+					LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, scoreInfo.GetThresHold());
+					return MEDIA_VISION_ERROR_NONE;
+				}
 			}
 
 			int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
index 1c4eb7e..c6bb99a 100644
--- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
@@ -771,6 +771,19 @@ int mv_inference_facial_landmark_detect_open(
 	std::vector<mv_source_h> sources;
 	std::vector<mv_rectangle_s> rects;
 
+	unsigned int width, height;
+	ret = mv_source_get_width(source, &width);
+	if (ret != MEDIA_VISION_ERROR_NONE) {
+		LOGE("Fail to get width");
+		return ret;
+	}
+
+	ret = mv_source_get_height(source, &height);
+	if (ret != MEDIA_VISION_ERROR_NONE) {
+		LOGE("Fail to get height");
+		return ret;
+	}
+
 	sources.push_back(source);
 
 	if (roi != NULL)
@@ -784,7 +797,7 @@ int mv_inference_facial_landmark_detect_open(
 
 	FacialLandMarkDetectionResults facialLandMarkDetectionResults;
 	ret = pInfer->GetFacialLandMarkDetectionResults(
-			&facialLandMarkDetectionResults);
+			&facialLandMarkDetectionResults, width, height);
 	if (ret != MEDIA_VISION_ERROR_NONE) {
 		LOGE("Fail to get inference results");
 		return ret;
-- 
2.7.4


From b4875a1fdb0fd25c9ef43fd498199a87a9feeb96 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Wed, 14 Jul 2021 16:04:28 +0900
Subject: [PATCH 09/16] Update testsuite to run facial landmark models with
 meta files

Change-Id: Id3b8c985a7dcdc55f3e034107f7e4e9f5cd3760b
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 meta-template/fld_mediapipe_192x192.json           | 46 ++++++++++
 meta-template/fld_tweakcnn_128x128.json            | 46 ++++++++++
 .../inference/inference_test_suite.c               | 99 +++++++++++++++++++++-
 3 files changed, 188 insertions(+), 3 deletions(-)
 create mode 100644 meta-template/fld_mediapipe_192x192.json
 create mode 100644 meta-template/fld_tweakcnn_128x128.json

diff --git a/meta-template/fld_mediapipe_192x192.json b/meta-template/fld_mediapipe_192x192.json
new file mode 100644
index 0000000..562fb77
--- /dev/null
+++ b/meta-template/fld_mediapipe_192x192.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 192, 192, 3],
+                "data_type" : 0,
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "conv2d_30",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.3,
+                "score_type" : 1
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "conv2d_20",
+                "index" : [-1, -1, -1, 1],
+                "landmark_type" : 0,
+                "landmark_coordinate" : 1,
+                "decoding_type" : 0,
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/fld_tweakcnn_128x128.json b/meta-template/fld_tweakcnn_128x128.json
new file mode 100644
index 0000000..7d8b48f
--- /dev/null
+++ b/meta-template/fld_tweakcnn_128x128.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "Placeholder",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 128, 128, 3],
+                "data_type" : 0,
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "fanet8ss_inference/fully_connected_1/Sigmoid",
+                "index" : [-1, -1, -1, -1],
+                "top_number" : 1,
+                "threshold" : 0.0,
+                "score_type" : 0
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "fanet8ss_inference/fully_connected_1/Sigmoid",
+                "index" : [-1, 1],
+                "landmark_type" : 0,
+                "landmark_coordinate" : 0,
+                "landmark_offset" : 2,
+                "decoding_type" : 0
+            }
+        ]
+    }
+}
diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 579e9f1..b838945 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -161,6 +161,14 @@
 #define FLD_OPENCV_CONFIG_CAFFE_PATH \
 	"/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt"
 
+/*
+ * Hosted models
+ */
+#define FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH \
+	"/usr/share/capi-media-vision/models/FLD/tflite/fld_tweakcnn_128x128.tflite"
+#define FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH \
+	"/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.tflite"
+
 //Pose Detection
 #define PLD_TFLITE_WEIGHT_PATH \
 	"/usr/share/capi-media-vision/models/PLD/tflite/pld-tflite-001.tflite"
@@ -2917,15 +2925,80 @@ int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg)
 	return err;
 }
 
+
+int perform_hosted_tflite_tweakCNN_128_config(mv_engine_config_h *engine_cfg)
+{
+	int err = MEDIA_VISION_ERROR_NONE;
+
+	mv_engine_config_h handle = NULL;
+	err = mv_create_engine_config(&handle);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create engine configuration handle.\n");
+		if (handle) {
+			int err2 = mv_destroy_engine_config(handle);
+			if (err2 != MEDIA_VISION_ERROR_NONE) {
+				printf("Fail to destroy engine configuration.\n");
+			}
+		}
+		return err;
+	}
+
+	mv_engine_config_set_string_attribute(handle,
+										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+										  FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH);
+
+	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
+									   MV_INFERENCE_BACKEND_TFLITE);
+
+	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
+									   MV_INFERENCE_TARGET_CPU);
+
+	*engine_cfg = handle;
+	return err;
+}
+
+int perform_hosted_tflite_mediapipe_192_config(mv_engine_config_h *engine_cfg)
+{
+	int err = MEDIA_VISION_ERROR_NONE;
+
+	mv_engine_config_h handle = NULL;
+	err = mv_create_engine_config(&handle);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create engine configuration handle.\n");
+		if (handle) {
+			int err2 = mv_destroy_engine_config(handle);
+			if (err2 != MEDIA_VISION_ERROR_NONE) {
+				printf("Fail to destroy engine configuration.\n");
+			}
+		}
+		return err;
+	}
+
+	mv_engine_config_set_string_attribute(handle,
+										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+										  FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH);
+
+	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
+									   MV_INFERENCE_BACKEND_TFLITE);
+
+	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
+									   MV_INFERENCE_TARGET_CPU);
+
+	*engine_cfg = handle;
+	return err;
+}
+
 int perform_facial_landmark_detection()
 {
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6 };
+	const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
 	const char *names[] = { "Configuration",
 						"Tflite(CPU) + TweakCNN",
 						"OPENCV(CPU) + TweakCNN",
+						"Hosted: TFLite(TweakCNN)",
+						"Hosted: TFLite(MediaPipe)",
 						"Prepare",
 						"Run",
 						"Back" };
@@ -2969,6 +3042,26 @@ int perform_facial_landmark_detection()
 			err = perform_opencv_cnncascade(&engine_cfg);
 		} break;
 		case 4: {
+			//perform Hosted TweakCNN config
+			if (engine_cfg) {
+				int err2 = mv_destroy_engine_config(engine_cfg);
+				if (err2 != MEDIA_VISION_ERROR_NONE)
+					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+				engine_cfg = NULL;
+			}
+			err = perform_hosted_tflite_tweakCNN_128_config(&engine_cfg);
+		} break;
+		case 5: {
+			//perform Hosted MediaPipe config
+			if (engine_cfg) {
+				int err2 = mv_destroy_engine_config(engine_cfg);
+				if (err2 != MEDIA_VISION_ERROR_NONE)
+					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+				engine_cfg = NULL;
+			}
+			err = perform_hosted_tflite_mediapipe_192_config(&engine_cfg);
+		} break;
+		case 6: {
 			// create - configure - prepare
 			if (infer) {
 				int err2 = mv_inference_destroy(infer);
@@ -2999,7 +3092,7 @@ int perform_facial_landmark_detection()
 				break;
 			}
 		} break;
-		case 5: {
+		case 7: {
 			if (mvSource) {
 				int err2 = mv_destroy_source(mvSource);
 				if (err2 != MEDIA_VISION_ERROR_NONE)
@@ -3046,7 +3139,7 @@ int perform_facial_landmark_detection()
 			unsigned long timeDiff = gettotalmillisec(diffspec);
 			printf("elapsed time : %lu(ms)\n", timeDiff);
 		} break;
-		case 6: {
+		case 8: {
 			//perform destroy
 			if (engine_cfg) {
 				err = mv_destroy_engine_config(engine_cfg);
-- 
2.7.4


From c946ec318ae4910a5cd8f62de8f4efac3a773484 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Fri, 16 Jul 2021 12:38:03 +0900
Subject: [PATCH 10/16] Remove unnecessary variables copying outputData,
 instead use outputData itself

Change-Id: I03f67afaaa83042812de7323f9b90714ba37f80a
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 .../mv_inference/inference/src/Inference.cpp       | 103 +++++++--------------
 1 file changed, 36 insertions(+), 67 deletions(-)

diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index c7ca663..5ff3235 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1243,13 +1243,9 @@ namespace inference
 					top_result_pq;
 			float value = 0.0f;
 
-			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-			std::vector<void *> inferResults(outputData.data.begin(),
-											outputData.data.end());
-
-			int count = inferDimInfo[0][1];
+			int count = outputData.dimInfo[0][1];
 			LOGI("count: %d", count);
-			float *prediction = reinterpret_cast<float *>(inferResults[0]);
+			float *prediction = reinterpret_cast<float *>(outputData.data[0]);
 			for (int i = 0; i < count; ++i) {
 				value = prediction[i];
 
@@ -1270,20 +1266,17 @@ namespace inference
 			}
 			std::reverse(top_results.begin(), top_results.end());
 
-			int classIdx = -1;
 			ImageClassificationResults results;
 			results.number_of_classes = 0;
-			for (size_t idx = 0; idx < top_results.size(); ++idx) {
-				if (top_results[idx].first < mThreshold)
+			for (auto& result : top_results) {
+				if (result.first < mThreshold)
 					continue;
-				LOGI("idx:%zu", idx);
-				LOGI("classIdx: %d", top_results[idx].second);
-				LOGI("classProb: %f", top_results[idx].first);
-
-				classIdx = top_results[idx].second;
-				results.indices.push_back(classIdx);
-				results.confidences.push_back(top_results[idx].first);
-				results.names.push_back(mUserListName[classIdx]);
+
+				LOGI("class Idx: %d, Prob: %.4f", result.second, result.first);
+
+				results.indices.push_back(result.second);
+				results.confidences.push_back(result.first);
+				results.names.push_back(mUserListName[result.second]);
 				results.number_of_classes++;
 			}
 
@@ -1372,12 +1365,6 @@ namespace inference
 			// In case of object detection,
 			// a model may apply post-process but others may not.
 			// Thus, those cases should be hanlded separately.
-			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-			LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
-
-			std::vector<void *> inferResults(outputData.data.begin(),
-											outputData.data.end());
-			LOGI("inferResults size: %zu", inferResults.size());
 
 			float *boxes = nullptr;
 			float *classes = nullptr;
@@ -1395,7 +1382,7 @@ namespace inference
 
 				number_of_detections = static_cast<int>(
 						*reinterpret_cast<float *>(outputData.data[0]));
-				cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3],
+				cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3],
 									CV_32F, outputData.data[0]);
 
 				// boxes
@@ -1418,11 +1405,11 @@ namespace inference
 				scores = cvScores.ptr<float>(0);
 
 			} else {
-				boxes = reinterpret_cast<float *>(inferResults[0]);
-				classes = reinterpret_cast<float *>(inferResults[1]);
-				scores = reinterpret_cast<float *>(inferResults[2]);
+				boxes = reinterpret_cast<float *>(outputData.data[0]);
+				classes = reinterpret_cast<float *>(outputData.data[1]);
+				scores = reinterpret_cast<float *>(outputData.data[2]);
 				number_of_detections =
-						(int) (*reinterpret_cast<float *>(inferResults[3]));
+						(int) (*reinterpret_cast<float *>(outputData.data[3]));
 			}
 
 			LOGI("number_of_detections = %d", number_of_detections);
@@ -1542,12 +1529,6 @@ namespace inference
 			// In case of object detection,
 			// a model may apply post-process but others may not.
 			// Thus, those cases should be handled separately.
-			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-			LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
-
-			std::vector<void *> inferResults(outputData.data.begin(),
-											outputData.data.end());
-			LOGI("inferResults size: %zu", inferResults.size());
 
 			float *boxes = nullptr;
 			float *classes = nullptr;
@@ -1565,7 +1546,7 @@ namespace inference
 
 				number_of_detections = static_cast<int>(
 						*reinterpret_cast<float *>(outputData.data[0]));
-				cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3],
+				cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3],
 									CV_32F, outputData.data[0]);
 
 				// boxes
@@ -1588,11 +1569,11 @@ namespace inference
 				scores = cvScores.ptr<float>(0);
 
 			} else {
-				boxes = reinterpret_cast<float *>(inferResults[0]);
-				classes = reinterpret_cast<float *>(inferResults[1]);
-				scores = reinterpret_cast<float *>(inferResults[2]);
+				boxes = reinterpret_cast<float *>(outputData.data[0]);
+				classes = reinterpret_cast<float *>(outputData.data[1]);
+				scores = reinterpret_cast<float *>(outputData.data[2]);
 				number_of_detections = static_cast<int>(
-						*reinterpret_cast<float *>(inferResults[3]));
+						*reinterpret_cast<float *>(outputData.data[3]));
 			}
 
 			int left, top, right, bottom;
@@ -1707,30 +1688,22 @@ namespace inference
 				return ret;
 			}
 
-			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-			std::vector<void *> inferResults(outputData.data.begin(),
-											outputData.data.end());
+			int number_of_detections = outputData.dimInfo[0][1] >> 1;
+			float *loc = reinterpret_cast<float *>(outputData.data[0]);
+			results.number_of_landmarks = number_of_detections;
+			results.locations.resize(number_of_detections);
 
-			long number_of_detections = inferDimInfo[0][1];
-			float *loc = reinterpret_cast<float *>(inferResults[0]);
-
-			results.number_of_landmarks = 0;
-
-			cv::Point point(0, 0);
 			LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
-			for (int idx = 0; idx < number_of_detections; idx += 2) {
-				point.x = static_cast<int>(loc[idx] * mSourceSize.width);
-				point.y = static_cast<int>(loc[idx + 1] * mSourceSize.height);
-
-				results.locations.push_back(point);
-				results.number_of_landmarks++;
+			for (auto& point : results.locations) {
+				point.x = static_cast<int>(*loc++ * mSourceSize.width);
+				point.y = static_cast<int>(*loc++ * mSourceSize.height);
 
 				LOGI("x:%d, y:%d", point.x, point.y);
 			}
 
 			*detectionResults = results;
 		}
-		LOGE("Inference: FacialLandmarkDetectionResults: %d\n",
+		LOGI("Inference: FacialLandmarkDetectionResults: %d\n",
 			 results.number_of_landmarks);
 		return MEDIA_VISION_ERROR_NONE;
 	}
@@ -1848,28 +1821,24 @@ namespace inference
 					return ret;
 			}
 
-			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-			std::vector<void *> inferResults(outputData.data.begin(),
-											outputData.data.end());
-
 			long number_of_poses = 1;
-			long number_of_landmarks = inferDimInfo[0][3];
-			float *tmp = static_cast<float *>(inferResults[0]);
-			cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+			long number_of_landmarks = outputData.dimInfo[0][3];
+			float *tmp = static_cast<float *>(outputData.data[0]);
+			cv::Size heatMapSize(outputData.dimInfo[0][1], outputData.dimInfo[0][2]);
 
 			cv::Point loc;
 			cv::Point2f loc2f;
 			double score;
 			cv::Mat blurredHeatMap;
 
-			cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
-													CV_32FC(inferDimInfo[0][3]), (void *) tmp);
+			cv::Mat reShapeTest(cv::Size(outputData.dimInfo[0][2], outputData.dimInfo[0][1]),
+													CV_32FC(outputData.dimInfo[0][3]), (void *) tmp);
 
-			cv::Mat multiChannels[inferDimInfo[0][3]];
+			cv::Mat multiChannels[outputData.dimInfo[0][3]];
 			split(reShapeTest, multiChannels);
 
-			float ratioX = static_cast<float>(inferDimInfo[0][2]);
-			float ratioY = static_cast<float>(inferDimInfo[0][1]);
+			float ratioX = static_cast<float>(outputData.dimInfo[0][2]);
+			float ratioY = static_cast<float>(outputData.dimInfo[0][1]);
 
 			if (mPoseResult == NULL) {
 				if(!mUserListName.empty()) {
-- 
2.7.4


From be33524781565d84e13f56c04fd777384d9c350a Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Thu, 15 Jul 2021 17:39:20 +0900
Subject: [PATCH 11/16] test: Add show_menu helper functions

show_menu -> show_menu_linear: clean manual numbering
show_menu_yes_or_no: clean name_last

Change-Id: Ib2b1579cf16351a2d1299a6285b7f6e9a0aed540
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 .../common/testsuite_common/mv_testsuite_common.c  |  29 ++++++
 .../common/testsuite_common/mv_testsuite_common.h  |   7 ++
 .../inference/inference_test_suite.c               | 108 +++++----------------
 3 files changed, 59 insertions(+), 85 deletions(-)

diff --git a/test/testsuites/common/testsuite_common/mv_testsuite_common.c b/test/testsuites/common/testsuite_common/mv_testsuite_common.c
index d368ef8..91f5bc1 100644
--- a/test/testsuites/common/testsuite_common/mv_testsuite_common.c
+++ b/test/testsuites/common/testsuite_common/mv_testsuite_common.c
@@ -272,3 +272,32 @@ int load_mv_source_from_file(
 
 	return err;
 }
+
+int show_menu_linear(const char *title, const char **menu, size_t len_menu)
+{
+	printf("*********************************************\n");
+	printf("* %38s *\n", title);
+	printf("*-------------------------------------------*\n");
+
+	for (size_t i = 0; i < len_menu; ++i)
+		printf("* %2i. %34s *\n", i + 1, menu[i]);
+
+	printf("*********************************************\n\n");
+	int selection = 0;
+	printf("Your choice: ");
+	if (scanf("%20i", &selection) == 0) {
+		if (scanf("%*[^\n]%*c") != 0) {
+			printf("ERROR: Reading the input line error.\n");
+			return -1;
+		}
+		printf("ERROR: Incorrect input.\n");
+	}
+
+	return selection;
+}
+
+int show_menu_yes_or_no(const char *title)
+{
+	const char *names_last[] = { "Yes", "No" };
+	return show_menu_linear(title, names_last, 2);
+}
\ No newline at end of file
diff --git a/test/testsuites/common/testsuite_common/mv_testsuite_common.h b/test/testsuites/common/testsuite_common/mv_testsuite_common.h
index 1a5c2cd..66876e0 100644
--- a/test/testsuites/common/testsuite_common/mv_testsuite_common.h
+++ b/test/testsuites/common/testsuite_common/mv_testsuite_common.h
@@ -142,6 +142,13 @@ int show_menu(
 		const char **names,
 		int number_of_option);
 
+int show_menu_linear(
+		const char *title,
+		const char **menu,
+		size_t len_menu);
+
+int show_menu_yes_or_no(const char *title);
+
 /**
  * @brief Loads media source from JPEG image.
  *
diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index b838945..24e1f03 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -328,30 +328,6 @@ void _image_classified_cb(mv_source_h source, const int number_of_classes,
 	}
 }
 
-int show_menu(const char *title, const int *options, const char **names,
-			  int cnt)
-{
-	printf("*********************************************\n");
-	printf("* %38s *\n", title);
-	printf("*-------------------------------------------*\n");
-	int i = 0;
-	for (i = 0; i < cnt; ++i)
-		printf("* %2i. %34s *\n", options[i], names[i]);
-
-	printf("*********************************************\n\n");
-	int selection = 0;
-	printf("Your choice: ");
-	if (scanf("%20i", &selection) == 0) {
-		if (scanf("%*[^\n]%*c") != 0) {
-			printf("ERROR: Reading the input line error.\n");
-			return -1;
-		}
-		printf("ERROR: Incorrect input.\n");
-	}
-
-	return selection;
-}
-
 int perform_configure_set_model_config_path(mv_engine_config_h engine_cfg)
 {
 	int err = MEDIA_VISION_ERROR_NONE;
@@ -576,7 +552,6 @@ int perform_configuration(mv_engine_config_h *engine_cfg)
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
 	const char *names[] = { "Set Model Configuration",
 							  "Set Model Weights",
 							  "Set Model Data Type",
@@ -605,8 +580,7 @@ int perform_configuration(mv_engine_config_h *engine_cfg)
 	}
 
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Actions: ", options, names,
-							ARRAY_SIZE(options));
+		sel_opt = show_menu_linear("Select Actions: ", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1:
 			err = perform_configure_set_model_config_path(handle);
@@ -1371,9 +1345,6 @@ int perform_image_classification()
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6,
-				7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-				17, 18, 19 };
 	const char *names[] = { "Configuration",
 							 "TFLite(cpu + Mobilenet)",
 							 "OpenCV(cpu + Squeezenet)",
@@ -1399,8 +1370,7 @@ int perform_image_classification()
 	mv_source_h mvSource = NULL;
 
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Action:", options, names,
-							ARRAY_SIZE(options));
+		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1: {
 			//perform configuration
@@ -1680,13 +1650,8 @@ int perform_image_classification()
 		}
 
 		sel_opt = 0;
-		const int options_last[2] = { 1, 2 };
-		const char *names_last[2] = { "Yes", "No" };
-
 		while (sel_opt == 0) {
-			sel_opt =
-					show_menu("Run Image Classification again?: ", options_last,
-							  names_last, ARRAY_SIZE(options_last));
+			sel_opt = show_menu_yes_or_no("Run Image Classification again?: ");
 			switch (sel_opt) {
 			case 1:
 				do_another = 1;
@@ -2049,7 +2014,6 @@ int perform_object_detection()
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
 	const char *names[] = { "Configuration",
 							 "TFLITE(CPU) + MobileNetV1+SSD",
 							 "OPENCV(CPU) + MobileNetV1+SSD",
@@ -2066,8 +2030,7 @@ int perform_object_detection()
 	mv_source_h mvSource = NULL;
 
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Action:", options, names,
-							ARRAY_SIZE(options));
+		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1: {
 			//perform configuration
@@ -2255,12 +2218,8 @@ int perform_object_detection()
 		}
 
 		sel_opt = 0;
-		const int options_last[2] = { 1, 2 };
-		const char *names_last[2] = { "Yes", "No" };
-
 		while (sel_opt == 0) {
-			sel_opt = show_menu("Run Object Detection again?:", options_last,
-								names_last, ARRAY_SIZE(options_last));
+			sel_opt = show_menu_yes_or_no("Run Object Detection again?:");
 			switch (sel_opt) {
 			case 1:
 				do_another = 1;
@@ -2563,7 +2522,6 @@ int perform_face_detection()
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
 	const char *names[] = { "Configuration",
 							 "TFLite(CPU) + MobileNetV1 + SSD",
 							 "OPENCV(CPU) + Resnet10 + SSD",
@@ -2579,8 +2537,8 @@ int perform_face_detection()
 	mv_source_h mvSource = NULL;
 
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Action:", options, names,
-							ARRAY_SIZE(options));
+		sel_opt =
+				show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1: {
 			//perform configuration
@@ -2759,12 +2717,8 @@ int perform_face_detection()
 		}
 
 		sel_opt = 0;
-		const int options_last[] = { 1, 2 };
-		const char *names_last[] = { "Yes", "No" };
-
 		while (sel_opt == 0) {
-			sel_opt = show_menu("Run Face Detection again?:", options_last,
-								names_last, ARRAY_SIZE(options_last));
+			sel_opt = show_menu_yes_or_no("Run Face Detection again?:");
 			switch (sel_opt) {
 			case 1:
 				do_another = 1;
@@ -2993,7 +2947,6 @@ int perform_facial_landmark_detection()
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
 	const char *names[] = { "Configuration",
 						"Tflite(CPU) + TweakCNN",
 						"OPENCV(CPU) + TweakCNN",
@@ -3008,8 +2961,7 @@ int perform_facial_landmark_detection()
 	mv_source_h mvSource = NULL;
 
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Action:", options, names,
-							ARRAY_SIZE(options));
+		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1: {
 			//perform configuration
@@ -3174,13 +3126,8 @@ int perform_facial_landmark_detection()
 		}
 
 		sel_opt = 0;
-		const int options_last[] = { 1, 2 };
-		const char *names_last[] = { "Yes", "No" };
-
 		while (sel_opt == 0) {
-			sel_opt = show_menu(
-					"Run Facial Landmark Detection again?:", options_last,
-					names_last, ARRAY_SIZE(options_last));
+			sel_opt = show_menu_yes_or_no("Run Facial Landmark Detection again?:");
 			switch (sel_opt) {
 			case 1:
 				do_another = 1;
@@ -3360,7 +3307,6 @@ int perform_pose_landmark_detection()
 	int err = MEDIA_VISION_ERROR_NONE;
 
 	int sel_opt = 0;
-	const int options[] = { 1, 2, 3, 4, 5, 6, 7 };
 	const char *names[] = { "Configuration",
 						"TFLITE(CPU) + CPM",
 						"Hosted: TFLITE(CPU) + CPM",
@@ -3374,8 +3320,7 @@ int perform_pose_landmark_detection()
 	mv_source_h mvSource = NULL;
 
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Action:", options, names,
-							ARRAY_SIZE(options));
+		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1: {
 			//perform configuration
@@ -3531,13 +3476,8 @@ int perform_pose_landmark_detection()
 		}
 
 		sel_opt = 0;
-		const int options_last[] = { 1, 2 };
-		const char *names_last[] = { "Yes", "No" };
-
 		while (sel_opt == 0) {
-			sel_opt = show_menu(
-					"Run Pose Landmark Detection again?:", options_last,
-					names_last, ARRAY_SIZE(options_last));
+			sel_opt = show_menu_yes_or_no("Run Pose Landmark Detection again?:");
 			switch (sel_opt) {
 			case 1:
 				do_another = 1;
@@ -3582,20 +3522,21 @@ int main()
 {
 	int sel_opt = 0;
 
-	const int options[] = { 1, 2, 3, 4, 5, 6 };
-	const char *names[] = { "Image Classification", "Object Detection",
-							 "Face Detection", "Facial Landmark Detection",
-							 "Pose Landmark Detection", "Exit" };
+	const char *names[] = {
+		"Image Classification",	"Object Detection",
+		"Face Detection",		   "Facial Landmark Detection",
+		"Pose Landmark Detection", "Exit"
+	};
 
 	int err = MEDIA_VISION_ERROR_NONE;
 	while (sel_opt == 0) {
-		sel_opt = show_menu("Select Action:", options, names,
-							ARRAY_SIZE(options));
+		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
 		switch (sel_opt) {
 		case 1: {
 			err = perform_image_classification();
 			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform image classification. ERROR[0x%x]\n", err);
+				printf("Fail to perform image classification. ERROR[0x%x]\n",
+					   err);
 			}
 		} break;
 		case 2: {
@@ -3613,7 +3554,8 @@ int main()
 		case 4: {
 			err = perform_facial_landmark_detection();
 			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", err);
+				printf("Fail to perform facial landmark detection. ERROR[0x%x]\n",
+					   err);
 			}
 		} break;
 		case 5: {
@@ -3634,12 +3576,8 @@ int main()
 		int do_another = 0;
 
 		sel_opt = 0;
-		const int options_last[] = { 1, 2 };
-		const char *names_last[] = { "Yes", "No" };
-
 		while (sel_opt == 0) {
-			sel_opt =
-					show_menu("Another action?: ", options_last, names_last, 2);
+			sel_opt = show_menu_yes_or_no("Another action?: ");
 			switch (sel_opt) {
 			case 1:
 				do_another = 1;
-- 
2.7.4


From 88f1df36b481aa8ef7878062bc3a008e026d6064 Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Thu, 22 Jul 2021 22:19:11 -0400
Subject: [PATCH 12/16] test: Add movenet for pose detection

Change-Id: Iba6c9fe101b0d3f0976aac41fc94ebd332d218e6
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 .../inference/inference_test_suite.c               | 52 +++++++++++++++++++++-
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 24e1f03..50b19eb 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -190,6 +190,10 @@
 #define PLD_LABEL_MOBILENET_V1_POSENET_257_PATH \
 	"/usr/share/capi-media-vision/models/PLD/tflite/pld_mobilenet_v1_posenet_multi_label.txt"
 
+// https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4
+#define PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_int8_movenet.tflite"
+
 /******
  * Public model:
  *  IC: mobilenet caffe, tf?
@@ -3302,6 +3306,39 @@ int perform_hosted_tflite_mobilenet_v1_posenet_257_config(mv_engine_config_h *en
 	return err;
 }
 
+int perform_hosted_tflite_int8_movenet(mv_engine_config_h *engine_cfg)
+{
+	int err = MEDIA_VISION_ERROR_NONE;
+
+	mv_engine_config_h handle = NULL;
+	err = mv_create_engine_config(&handle);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create engine configuration handle.\n");
+		if (handle) {
+			int err2 = mv_destroy_engine_config(handle);
+			if (err2 != MEDIA_VISION_ERROR_NONE) {
+				printf("Fail to destroy engine configuration.\n");
+			}
+		}
+		return err;
+	}
+
+	mv_engine_config_set_string_attribute(
+					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+					PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH);
+
+	mv_engine_config_set_int_attribute(
+					handle, MV_INFERENCE_BACKEND_TYPE,
+					MV_INFERENCE_BACKEND_TFLITE);
+
+	mv_engine_config_set_int_attribute(
+					handle, MV_INFERENCE_TARGET_TYPE,
+					MV_INFERENCE_TARGET_CPU);
+
+	*engine_cfg = handle;
+	return err;
+}
+
 int perform_pose_landmark_detection()
 {
 	int err = MEDIA_VISION_ERROR_NONE;
@@ -3311,6 +3348,7 @@ int perform_pose_landmark_detection()
 						"TFLITE(CPU) + CPM",
 						"Hosted: TFLITE(CPU) + CPM",
 						"Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET",
+						"Hosted: TFLITE(CPU) + INT8_MOVENET",
 						"Prepare",
 						"Run",
 						"Back" };
@@ -3364,6 +3402,16 @@ int perform_pose_landmark_detection()
 			err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg);
 		} break;
 		case 5: {
+			//perform int8 movenet
+			if (engine_cfg) {
+				int err2 = mv_destroy_engine_config(engine_cfg);
+				if (err2 != MEDIA_VISION_ERROR_NONE)
+					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+				engine_cfg = NULL;
+			}
+			err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg);
+		} break;
+		case 6: {
 			// create - configure - prepare
 			if (infer) {
 				int err2 = mv_inference_destroy(infer);
@@ -3394,7 +3442,7 @@ int perform_pose_landmark_detection()
 				break;
 			}
 		} break;
-		case 6: {
+		case 7: {
 			if (mvSource) {
 				int err2 = mv_destroy_source(mvSource);
 				if (err2 != MEDIA_VISION_ERROR_NONE)
@@ -3441,7 +3489,7 @@ int perform_pose_landmark_detection()
 			unsigned long timeDiff = gettotalmillisec(diffspec);
 			printf("elapsed time : %lu(ms)\n", timeDiff);
 		} break;
-		case 7: {
+		case 8: {
 			//perform destroy
 			if (engine_cfg) {
 				err = mv_destroy_engine_config(engine_cfg);
-- 
2.7.4


From ae8c51996a086998145617eba1f3ac6deebfb165 Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Sun, 1 Aug 2021 21:28:41 -0400
Subject: [PATCH 13/16] test: Remove test while loop

While loop keep previous test context which contains allocated resource.
This resource check only increase code not usability.

Change-Id: I1f2bfdd17fd6305d313d98362c4549b82f214d62
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 .../inference/inference_test_suite.c               | 105 ++++++---------------
 1 file changed, 30 insertions(+), 75 deletions(-)

diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 50b19eb..5053cea 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -3568,79 +3568,34 @@ int perform_pose_landmark_detection()
 
 int main()
 {
-	int sel_opt = 0;
-
-	const char *names[] = {
-		"Image Classification",	"Object Detection",
-		"Face Detection",		   "Facial Landmark Detection",
-		"Pose Landmark Detection", "Exit"
-	};
-
 	int err = MEDIA_VISION_ERROR_NONE;
-	while (sel_opt == 0) {
-		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
-		switch (sel_opt) {
-		case 1: {
-			err = perform_image_classification();
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform image classification. ERROR[0x%x]\n",
-					   err);
-			}
-		} break;
-		case 2: {
-			err = perform_object_detection();
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform object detection. ERROR[0x%x]\n", err);
-			}
-		} break;
-		case 3: {
-			err = perform_face_detection();
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform face detection. ERROR[0x%x]\n", err);
-			}
-		} break;
-		case 4: {
-			err = perform_facial_landmark_detection();
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform facial landmark detection. ERROR[0x%x]\n",
-					   err);
-			}
-		} break;
-		case 5: {
-			err = perform_pose_landmark_detection();
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to perform pose landmark detection");
-			}
-		} break;
-		case 6: {
-			printf("Exit");
-		} break;
-		default:
-			printf("Invalid option");
-			sel_opt = 0;
-			continue;
-		}
-
-		int do_another = 0;
-
-		sel_opt = 0;
-		while (sel_opt == 0) {
-			sel_opt = show_menu_yes_or_no("Another action?: ");
-			switch (sel_opt) {
-			case 1:
-				do_another = 1;
-				break;
-			case 2:
-				do_another = 0;
-				break;
-			default:
-				printf("Invalid option.\n");
-				sel_opt = 0;
-			}
-		}
-
-		sel_opt = (do_another == 1) ? 0 : 1;
-	}
-
-	return 0;
-}
+	const char *names[] = { "Image Classification", "Object Detection",
+							"Face Detection", "Facial Landmark Detection",
+							"Pose Landmark Detection" };
+
+	int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
+	if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) {
+		printf("Invalid option");
+		return -1;
+	}
+	switch (sel_opt) {
+	case 1: {
+		err = perform_image_classification();
+	} break;
+	case 2: {
+		err = perform_object_detection();
+	} break;
+	case 3: {
+		err = perform_face_detection();
+	} break;
+	case 4: {
+		err = perform_facial_landmark_detection();
+	} break;
+	case 5: {
+		err = perform_pose_landmark_detection();
+	} break;
+	}
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to perform task. ERROR[0x%x]\n", err);
+	return err;
+}
\ No newline at end of file
-- 
2.7.4


From 0a7ab1f50e641c4937d2ca87ab0950067688860b Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Mon, 2 Aug 2021 03:37:46 -0400
Subject: [PATCH 14/16] test: Seperate resource allocation

perform_pose_landmark_detection alloactate mv_source, mv_engine,
mv_infer in a function so makes hard to deallocate resource and
error handling.

Change-Id: I80df947730b734e9cb4f95bc238d66cccbe0ca7d
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 .../inference/inference_test_suite.c               | 559 ++++++++-------------
 1 file changed, 212 insertions(+), 347 deletions(-)

diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 5053cea..e2df147 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -194,6 +194,12 @@
 #define PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH \
 	"/usr/share/capi-media-vision/models/PLD/tflite/pld_int8_movenet.tflite"
 
+#define TASK_IC 0
+#define TASK_OD 1
+#define TASK_FD 2
+#define TASK_FLD 3
+#define TASK_PLD 4
+
 /******
  * Public model:
  *  IC: mobilenet caffe, tf?
@@ -332,6 +338,113 @@ void _image_classified_cb(mv_source_h source, const int number_of_classes,
 	}
 }
 
+int infer_task_with_img(char *img_file_name, mv_inference_h infer, int task_id)
+{
+	mv_source_h mvSource = NULL;
+	struct timespec s_tspec;
+	struct timespec e_tspec;
+
+	int err = mv_create_source(&mvSource);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create mvSource.\n");
+		return err;
+	}
+
+	err = load_mv_source_from_file(img_file_name, mvSource);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to load mvSource err: %d.\n", err);
+		return err;
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+	switch (task_id) {
+	case TASK_IC:
+		err = mv_inference_image_classify(mvSource, infer, NULL,
+										  _image_classified_cb, NULL);
+		break;
+	case TASK_OD:
+		err = mv_inference_object_detect(mvSource, infer, _object_detected_cb,
+										 NULL);
+		break;
+	case TASK_FD:
+		err = mv_inference_face_detect(mvSource, infer, _face_detected_cb,
+									   NULL);
+		break;
+	case TASK_FLD:
+		err = mv_inference_facial_landmark_detect(
+				mvSource, infer, NULL, _facial_landmark_detected_cb, NULL);
+		break;
+	case TASK_PLD:
+		err = mv_inference_pose_landmark_detect(
+				mvSource, infer, NULL, _pose_landmark_detected_cb, NULL);
+		break;
+	default:
+		err = MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		break;
+	}
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to infer task [err:%i]\n", err);
+
+	clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+	struct timespec diffspec = diff(s_tspec, e_tspec);
+	unsigned long timeDiff = gettotalmillisec(diffspec);
+	printf("elapsed time : %lu(ms)\n", timeDiff);
+
+	err = mv_destroy_source(mvSource);
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to destroy mvSource [err:%i]\n", err);
+
+	return err;
+}
+
+int infer_task(mv_inference_h infer, int task_id)
+{
+	char *in_file_name = NULL;
+	/* Load media source */
+	while (input_string("Input file name to be inferred:", 1024,
+						&(in_file_name)) == -1)
+		printf("Incorrect input! Try again.\n");
+
+	int err = infer_task_with_img(in_file_name, infer, task_id);
+	free(in_file_name);
+	return err;
+}
+
+int mv_inference_task_helper(mv_engine_config_h engine_cfg, int task_id)
+{
+	mv_inference_h infer = NULL;
+
+	int err = mv_inference_create(&infer);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create inference handle [err:%i]\n", err);
+		return err;
+	}
+
+	err = mv_inference_configure(infer, engine_cfg);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to configure inference handle [err:%i]\n", err);
+		goto clean_mv_inference;
+	}
+
+	err = mv_inference_prepare(infer);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to prepare inference handle");
+		goto clean_mv_inference;
+	}
+
+	err = infer_task(infer, task_id);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to infer task");
+	}
+
+clean_mv_inference:
+	err = mv_inference_destroy(infer);
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to destroy inference handle [err:%i]\n", err);
+	return err;
+}
+
 int perform_configure_set_model_config_path(mv_engine_config_h engine_cfg)
 {
 	int err = MEDIA_VISION_ERROR_NONE;
@@ -3172,398 +3285,150 @@ int perform_facial_landmark_detection()
 	return MEDIA_VISION_ERROR_NONE;
 }
 
-int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg)
+int engine_config_hosted_tflite_cpu(mv_engine_config_h handle,
+									const char *tf_weight)
 {
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
+	int err = mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight);
 	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
 		return err;
 	}
 
-	const char *inputNodeName = "image";
-	const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" };
-
-	mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, PLD_TFLITE_WEIGHT_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
-									   MV_INFERENCE_DATA_FLOAT32);
-
-	mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
-										  0.0);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
-										  1.0);
-
-	mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
-									   192);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-									   192);
-
-	mv_engine_config_set_int_attribute(handle,
-									   MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-
-	mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
-										  inputNodeName);
-
-	mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_hosted_tflite_cpm_192_config(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
+	err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
+											 MV_INFERENCE_BACKEND_TFLITE);
 	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
 		return err;
 	}
 
-	mv_engine_config_set_string_attribute(
-					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-					PLD_TFLITE_WEIGHT_CPM_192_PATH);
-
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_BACKEND_TYPE,
-					MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_TARGET_TYPE,
-					MV_INFERENCE_TARGET_CPU);
-
-	*engine_cfg = handle;
+	err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
+											 MV_INFERENCE_TARGET_CPU);
 	return err;
 }
 
-int perform_hosted_tflite_mobilenet_v1_posenet_257_config(mv_engine_config_h *engine_cfg)
+int perform_armnn_cpm_config(mv_engine_config_h handle)
 {
-	int err = MEDIA_VISION_ERROR_NONE;
+	const char *inputNodeName = "image";
+	const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" };
 
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
+	int err = engine_config_hosted_tflite_cpu(handle, PLD_TFLITE_WEIGHT_PATH);
 	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
 		return err;
 	}
 
-	mv_engine_config_set_string_attribute(
-					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-					PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH);
+	err = mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-/*
-	mv_engine_config_set_string_attribute(
-					handle, MV_INFERENCE_MODEL_USER_FILE_PATH,
-					PLD_LABEL_MOBILENET_V1_POSENET_257_PATH);
-*/
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_BACKEND_TYPE,
-					MV_INFERENCE_BACKEND_TFLITE);
+	err = mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_TARGET_TYPE,
-					MV_INFERENCE_TARGET_CPU);
+	err = mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-	*engine_cfg = handle;
-	return err;
-}
+	err = mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-int perform_hosted_tflite_int8_movenet(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
+	err = mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
+	err = mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 192);
 	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
 		return err;
 	}
 
-	mv_engine_config_set_string_attribute(
-					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-					PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH);
+	err = mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 192);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_BACKEND_TYPE,
-					MV_INFERENCE_BACKEND_TFLITE);
+	err = mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_TARGET_TYPE,
-					MV_INFERENCE_TARGET_CPU);
+	err = mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		return err;
+	}
 
-	*engine_cfg = handle;
+	err = mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
 	return err;
 }
 
 int perform_pose_landmark_detection()
 {
 	int err = MEDIA_VISION_ERROR_NONE;
-
-	int sel_opt = 0;
-	const char *names[] = { "Configuration",
-						"TFLITE(CPU) + CPM",
-						"Hosted: TFLITE(CPU) + CPM",
-						"Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET",
-						"Hosted: TFLITE(CPU) + INT8_MOVENET",
-						"Prepare",
-						"Run",
-						"Back" };
-
 	mv_engine_config_h engine_cfg = NULL;
-	mv_inference_h infer = NULL;
-	mv_source_h mvSource = NULL;
+	const char *names[] = {
+		"TFLITE(CPU) + CPM",
+		"Hosted: TFLITE(CPU) + CPM",
+		"Hosted: TFLITE(CPU) + MOBILENET_V1_POSENET",
+		"Hosted: TFLITE(CPU) + INT8_MOVENET",
+	};
 
-	while (sel_opt == 0) {
-		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
-		switch (sel_opt) {
-		case 1: {
-			//perform configuration
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-
-			err = perform_configuration(&engine_cfg);
-		} break;
-		case 2: {
-			//perform TweakCNN config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_armnn_cpm_config(&engine_cfg);
-		} break;
-		case 3: {
-			//perform cpm config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_hosted_tflite_cpm_192_config(&engine_cfg);
-		} break;
-		case 4: {
-			//perform mobilenet-v1 posenet config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg);
-		} break;
-		case 5: {
-			//perform int8 movenet
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_hosted_tflite_mobilenet_v1_posenet_257_config(&engine_cfg);
-		} break;
-		case 6: {
-			// create - configure - prepare
-			if (infer) {
-				int err2 = mv_inference_destroy(infer);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy inference handle [err:%i]\n", err2);
-				infer = NULL;
-			}
-
-			// inference
-			// create handle
-			err = mv_inference_create(&infer);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to create inference handle [err:%i]\n", err);
-				break;
-			}
-
-			//configure
-			err = mv_inference_configure(infer, engine_cfg);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to configure inference handle [err:%i]\n", err);
-				break;
-			}
-
-			//prepare
-			err = mv_inference_prepare(infer);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to prepare inference handle");
-				break;
-			}
-		} break;
-		case 7: {
-			if (mvSource) {
-				int err2 = mv_destroy_source(mvSource);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource\n");
-				mvSource = NULL;
-			}
-
-			char *in_file_name = NULL;
-			/* Load media source */
-			while (input_string("Input file name to be inferred:", 1024,
-								&(in_file_name)) == -1)
-				printf("Incorrect input! Try again.\n");
-
-			err = mv_create_source(&mvSource);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to create mvSource.\n");
-				free(in_file_name);
-				break;
-			}
-
-			err = load_mv_source_from_file(in_file_name, mvSource);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				int err2 = mv_destroy_source(mvSource);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource err: %d.\n", err2);
-				mvSource = NULL;
-				free(in_file_name);
-				break;
-			}
-			free(in_file_name);
-
-			struct timespec s_tspec;
-			struct timespec e_tspec;
-
-			clock_gettime(CLOCK_MONOTONIC, &s_tspec);
-
-			// Object Detect
-			err = mv_inference_pose_landmark_detect(
-					mvSource, infer, NULL, _pose_landmark_detected_cb, NULL);
-
-			clock_gettime(CLOCK_MONOTONIC, &e_tspec);
-
-			struct timespec diffspec = diff(s_tspec, e_tspec);
-			unsigned long timeDiff = gettotalmillisec(diffspec);
-			printf("elapsed time : %lu(ms)\n", timeDiff);
-		} break;
-		case 8: {
-			//perform destroy
-			if (engine_cfg) {
-				err = mv_destroy_engine_config(engine_cfg);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err);
-				engine_cfg = NULL;
-			}
-
-			if (infer) {
-				err = mv_inference_destroy(infer);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy inference handle [err:%i]\n", err);
-				infer = NULL;
-			}
-
-			if (mvSource) {
-				err = mv_destroy_source(mvSource);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource [err:%i]\n", err);
-				mvSource = NULL;
-			}
-		} break;
-		default:
-			printf("Invalid option.\n");
-			sel_opt = 0;
-			continue;
-		}
-
-		int do_another = 0;
-		if (err != MEDIA_VISION_ERROR_NONE) {
-			printf("ERROR: Action is finished with error code: %i\n", err);
-		}
-
-		sel_opt = 0;
-		while (sel_opt == 0) {
-			sel_opt = show_menu_yes_or_no("Run Pose Landmark Detection again?:");
-			switch (sel_opt) {
-			case 1:
-				do_another = 1;
-				break;
-			case 2:
-				do_another = 0;
-				break;
-			default:
-				printf("Invalid option.\n");
-				sel_opt = 0;
-			}
-		}
-
-		sel_opt = (do_another == 1) ? 0 : 1;
+	int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
+	if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) {
+		printf("Invalid option");
+		return -1;
 	}
 
-	if (engine_cfg) {
-		err = mv_destroy_engine_config(engine_cfg);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy engine_cfg [err:%i]\n", err);
-		engine_cfg = NULL;
+	err = mv_create_engine_config(&engine_cfg);
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to create engine configuration handle.\n");
+		return err;
 	}
 
-	if (infer) {
-		err = mv_inference_destroy(infer);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy inference handle [err:%i]\n", err);
-		infer = NULL;
+	switch (sel_opt) {
+	case 1: { //perform TweakCNN config
+		err = perform_armnn_cpm_config(engine_cfg);
+	} break;
+	case 2: { //perform cpm config
+		err = engine_config_hosted_tflite_cpu(engine_cfg,
+											  PLD_TFLITE_WEIGHT_CPM_192_PATH);
+	} break;
+	case 3: { //perform mobilenet-v1 posenet config
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH);
+	} break;
+	case 4: { //perform int8 movenet
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH);
+	} break;
 	}
-
-	if (mvSource) {
-		err = mv_destroy_source(mvSource);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy mvSource [err:%i]\n", err);
-		mvSource = NULL;
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to perform config [err:%i]\n", err);
+		goto clean_pose_engine;
 	}
 
-	return MEDIA_VISION_ERROR_NONE;
+	err = mv_inference_task_helper(engine_cfg, TASK_PLD);
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to detect with engine [err:%i]\n", err);
+
+clean_pose_engine:
+	err = mv_destroy_engine_config(engine_cfg);
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to destroy engine_cfg [err:%i]\n", err);
+
+	return err;
 }
 
 int main()
-- 
2.7.4


From aa3185f2252bdebbfbf48cc3dab3d4c97f54aa7c Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Tue, 3 Aug 2021 21:42:19 -0400
Subject: [PATCH 15/16] test: Cleanup perform_facial_landmark_detection

Base on commit 0a7ab1f50e641c4937d2ca87ab0950067688860b
alloactate mv_source, mv_engine, mv_infer in a function
so makes hard to deallocate resource and error handling.

Change-Id: Ib242154ec33602a8a9788203cabc34e37e5f878d
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 .../inference/inference_test_suite.c               | 473 ++++-----------------
 1 file changed, 90 insertions(+), 383 deletions(-)

diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index e2df147..5fa02f2 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -200,6 +200,15 @@
 #define TASK_FLD 3
 #define TASK_PLD 4
 
+#define RET_IF_FAIL(exp)                                \
+	do {                                                \
+		int err = (exp);                                \
+		if (err != MEDIA_VISION_ERROR_NONE) {           \
+			printf("[%s] %s failed\n", __func__, #exp); \
+			return err;                                 \
+		}                                               \
+	} while (0)
+
 /******
  * Public model:
  *  IC: mobilenet caffe, tf?
@@ -2876,413 +2885,111 @@ int perform_face_detection()
 	return MEDIA_VISION_ERROR_NONE;
 }
 
-int perform_tflite_TweakCNN(mv_engine_config_h *engine_cfg)
+int perform_tflite_TweakCNN(mv_engine_config_h handle)
 {
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
 	const char *inputNodeName = "INPUT_TENSOR_NAME";
 	const char *outputNodeName[] = { "OUTPUT_TENSOR_NAME" };
 
-	mv_engine_config_set_string_attribute(handle,
-										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-										  FLD_TFLITE_WEIGHT_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
-									   MV_INFERENCE_DATA_FLOAT32);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
-										  0.0);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
-										  1.0);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
-									   128);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-									   128);
-
-	mv_engine_config_set_int_attribute(handle,
-									   MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-
-	mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
-										  inputNodeName);
-
-	mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
-
-	*engine_cfg = handle;
-	return err;
+	RET_IF_FAIL(
+			engine_config_hosted_tflite_cpu(handle, FLD_TFLITE_WEIGHT_PATH));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 128));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 128));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName));
+	RET_IF_FAIL(mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1));
+	return MEDIA_VISION_ERROR_NONE;
 }
 
-int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg)
+int perform_opencv_cnncascade(mv_engine_config_h handle)
 {
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
 	const char *inputNodeName = "data";
 	const char *outputNodeName[] = { "Sigmoid_fc2" };
 
-	mv_engine_config_set_string_attribute(handle,
-										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-										  FLD_OPENCV_WEIGHT_CAFFE_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
-									   MV_INFERENCE_DATA_FLOAT32);
-
-	mv_engine_config_set_string_attribute(
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+			FLD_OPENCV_WEIGHT_CAFFE_PATH));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
 			handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
-			FLD_OPENCV_CONFIG_CAFFE_PATH);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
-										  127.5);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
-										  127.5);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_OPENCV);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
-									   128);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-									   128);
-
-	mv_engine_config_set_int_attribute(handle,
-									   MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-
-	mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
-										  inputNodeName);
-
-	mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-
-int perform_hosted_tflite_tweakCNN_128_config(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
-	mv_engine_config_set_string_attribute(handle,
-										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-										  FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_hosted_tflite_mediapipe_192_config(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
-	mv_engine_config_set_string_attribute(handle,
-										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-										  FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	*engine_cfg = handle;
-	return err;
+			FLD_OPENCV_CONFIG_CAFFE_PATH));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 127.5));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_OPENCV));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 128));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 128));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName));
+	RET_IF_FAIL(mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1));
+	return MEDIA_VISION_ERROR_NONE;
 }
 
 int perform_facial_landmark_detection()
 {
 	int err = MEDIA_VISION_ERROR_NONE;
-
-	int sel_opt = 0;
-	const char *names[] = { "Configuration",
-						"Tflite(CPU) + TweakCNN",
-						"OPENCV(CPU) + TweakCNN",
-						"Hosted: TFLite(TweakCNN)",
-						"Hosted: TFLite(MediaPipe)",
-						"Prepare",
-						"Run",
-						"Back" };
-
 	mv_engine_config_h engine_cfg = NULL;
-	mv_inference_h infer = NULL;
-	mv_source_h mvSource = NULL;
-
-	while (sel_opt == 0) {
-		sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
-		switch (sel_opt) {
-		case 1: {
-			//perform configuration
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-
-			err = perform_configuration(&engine_cfg);
-		} break;
-		case 2: {
-			//perform SRID TweakCNN config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_tflite_TweakCNN(&engine_cfg);
-		} break;
-		case 3: {
-			//perform CNN cascade
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-			}
-			err = perform_opencv_cnncascade(&engine_cfg);
-		} break;
-		case 4: {
-			//perform Hosted TweakCNN config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_hosted_tflite_tweakCNN_128_config(&engine_cfg);
-		} break;
-		case 5: {
-			//perform Hosted MediaPipe config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-			err = perform_hosted_tflite_mediapipe_192_config(&engine_cfg);
-		} break;
-		case 6: {
-			// create - configure - prepare
-			if (infer) {
-				int err2 = mv_inference_destroy(infer);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy inference handle [err:%i]\n", err2);
-				infer = NULL;
-			}
-
-			// inference
-			// create handle
-			err = mv_inference_create(&infer);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to create inference handle [err:%i]\n", err);
-				break;
-			}
-
-			//configure
-			err = mv_inference_configure(infer, engine_cfg);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to configure inference handle [err:%i]\n", err);
-				break;
-			}
-
-			//prepare
-			err = mv_inference_prepare(infer);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to prepare inference handle");
-				break;
-			}
-		} break;
-		case 7: {
-			if (mvSource) {
-				int err2 = mv_destroy_source(mvSource);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource\n");
-				mvSource = NULL;
-			}
-
-			char *in_file_name = NULL;
-			/* Load media source */
-			while (input_string("Input file name to be inferred:", 1024,
-								&(in_file_name)) == -1)
-				printf("Incorrect input! Try again.\n");
-
-			err = mv_create_source(&mvSource);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to create mvSource.\n");
-				free(in_file_name);
-				break;
-			}
-
-			err = load_mv_source_from_file(in_file_name, mvSource);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				int err2 = mv_destroy_source(mvSource);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource. error code:%i\n", err2);
-				mvSource = NULL;
-				free(in_file_name);
-				break;
-			}
-			free(in_file_name);
-
-			struct timespec s_tspec;
-			struct timespec e_tspec;
-
-			clock_gettime(CLOCK_MONOTONIC, &s_tspec);
-
-			// Object Detect
-			err = mv_inference_facial_landmark_detect(
-					mvSource, infer, NULL, _facial_landmark_detected_cb, NULL);
-
-			clock_gettime(CLOCK_MONOTONIC, &e_tspec);
-
-			struct timespec diffspec = diff(s_tspec, e_tspec);
-			unsigned long timeDiff = gettotalmillisec(diffspec);
-			printf("elapsed time : %lu(ms)\n", timeDiff);
-		} break;
-		case 8: {
-			//perform destroy
-			if (engine_cfg) {
-				err = mv_destroy_engine_config(engine_cfg);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err);
-				engine_cfg = NULL;
-			}
-
-			if (infer) {
-				err = mv_inference_destroy(infer);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy inference handle [err:%i]\n", err);
-				infer = NULL;
-			}
-
-			if (mvSource) {
-				err = mv_destroy_source(mvSource);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource [err:%i]\n", err);
-				mvSource = NULL;
-			}
-		} break;
-		default:
-			printf("Invalid option.\n");
-			sel_opt = 0;
-			continue;
-		}
-
-		int do_another = 0;
-		if (err != MEDIA_VISION_ERROR_NONE) {
-			printf("ERROR: Action is finished with error code:%i\n", err);
-		}
-
-		sel_opt = 0;
-		while (sel_opt == 0) {
-			sel_opt = show_menu_yes_or_no("Run Facial Landmark Detection again?:");
-			switch (sel_opt) {
-			case 1:
-				do_another = 1;
-				break;
-			case 2:
-				do_another = 0;
-				break;
-			default:
-				printf("Invalid option.\n");
-				sel_opt = 0;
-			}
-		}
+	const char *names[] = {
+		"Tflite(CPU) + TweakCNN",
+		"OPENCV(CPU) + TweakCNN",
+		"Hosted: TFLite(TweakCNN)",
+		"Hosted: TFLite(MediaPipe)",
+	};
 
-		sel_opt = (do_another == 1) ? 0 : 1;
+	int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
+	if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) {
+		printf("Invalid option");
+		return -1;
 	}
 
-	if (engine_cfg) {
-		err = mv_destroy_engine_config(engine_cfg);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy engine_cfg [err:%i]\n", err);
-		engine_cfg = NULL;
-	}
+	RET_IF_FAIL(mv_create_engine_config(&engine_cfg));
 
-	if (infer) {
-		err = mv_inference_destroy(infer);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy inference handle [err:%i]\n", err);
-		infer = NULL;
+	switch (sel_opt) {
+	case 1: {
+		err = perform_tflite_TweakCNN(engine_cfg);
+	} break;
+	case 2: {
+		err = perform_opencv_cnncascade(engine_cfg);
+	} break;
+	case 3: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, FLD_TFLITE_WIEGHT_TWEAKCNN_128_PATH);
+	} break;
+	case 4: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, FLD_TFLITE_WIEGHT_MEDIAPIPE_192_PATH);
+	} break;
 	}
-
-	if (mvSource) {
-		err = mv_destroy_source(mvSource);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy mvSource [err:%i]\n", err);
-		mvSource = NULL;
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to perform config [err:%i]\n", err);
+		goto clean_facial_landmark_engine;
 	}
 
-	return MEDIA_VISION_ERROR_NONE;
+	RET_IF_FAIL(mv_inference_task_helper(engine_cfg, TASK_FLD));
+
+clean_facial_landmark_engine:
+	RET_IF_FAIL(mv_destroy_engine_config(engine_cfg));
+	return err;
 }
 
 int engine_config_hosted_tflite_cpu(mv_engine_config_h handle,
-- 
2.7.4


From 9b045d0c27ad4dd8b5a51e90dd1bdefb90b1b4c1 Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Wed, 4 Aug 2021 03:06:26 -0400
Subject: [PATCH 16/16] test: Cleanup perform_face_detection

Base on commit 0a7ab1f50e641c4937d2ca87ab0950067688860b
alloactate mv_source, mv_engine, mv_infer in a function
so makes hard to deallocate resource and error handling.

Change-Id: Iaf1124821e39c7785955bc9b06a49b5997bb0430
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 .../inference/inference_test_suite.c               | 741 +++++----------------
 1 file changed, 175 insertions(+), 566 deletions(-)

diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 5fa02f2..b47547e 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -2386,503 +2386,159 @@ int perform_object_detection()
 	return MEDIA_VISION_ERROR_NONE;
 }
 
-int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg)
+int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h handle)
 {
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
-	const char *inputNodeName = "normalized_input_image_tensor";
-	const char *outputNodeName[] = { "TFLite_Detection_PostProcess",
-								"TFLite_Detection_PostProcess:1",
-								"TFLite_Detection_PostProcess:2",
-								"TFLite_Detection_PostProcess:3" };
-
-	mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
-									   MV_INFERENCE_DATA_FLOAT32);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
-										  127.5);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
-										  127.5);
-
-	mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
-									   300);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-									   300);
-
-	mv_engine_config_set_int_attribute(handle,
-									   MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-
-	mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
-										  inputNodeName);
-
-	mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_opencv_resnet10ssd_face(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
-	const char *inputNodeName = "data";
-	const char *outputNodeName[] = { "detection_out" };
-
-	mv_engine_config_set_string_attribute(handle,
-										  MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-										  FD_OPENCV_WEIGHT_CAFFE_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
-									   MV_INFERENCE_DATA_FLOAT32);
-
-	mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
-			FD_OPENCV_CONFIG_CAFFE_PATH);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
-										  135.7);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
-										  1.0);
-
-	mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_OPENCV);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
-									   300);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-									   300);
-
-	mv_engine_config_set_int_attribute(handle,
-									   MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-
-	mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
-										  inputNodeName);
-
-	mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
 	const char *inputNodeName = "normalized_input_image_tensor";
 	const char *outputNodeName[] = { "TFLite_Detection_PostProcess",
-								"TFLite_Detection_PostProcess:1",
-								"TFLite_Detection_PostProcess:2",
-								"TFLite_Detection_PostProcess:3" };
-
-	mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
-									   MV_INFERENCE_DATA_FLOAT32);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
-										  127.5);
-
-	mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
-										  127.5);
-
-	mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-									   MV_INFERENCE_BACKEND_ARMNN);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-									   MV_INFERENCE_TARGET_CPU);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
-									   300);
-
-	mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-									   300);
-
-	mv_engine_config_set_int_attribute(handle,
-									   MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-
-	mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
-										  inputNodeName);
-
-	mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_hosted_tflite_mobilenetv1ssd_300_config_face(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
-	mv_engine_config_set_string_attribute(
-					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-					FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH);
-
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_BACKEND_TYPE,
-					MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_TARGET_TYPE,
-					 MV_INFERENCE_TARGET_CPU);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_hosted_tflite_blazeface_128_config_face(mv_engine_config_h *engine_cfg)
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	mv_engine_config_h handle = NULL;
-	err = mv_create_engine_config(&handle);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		printf("Fail to create engine configuration handle.\n");
-		if (handle) {
-			int err2 = mv_destroy_engine_config(handle);
-			if (err2 != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to destroy engine configuration.\n");
-			}
-		}
-		return err;
-	}
-
-	mv_engine_config_set_string_attribute(
-					handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-					FD_TFLITE_WEIGHT_BLAZEFACE_128_PATH);
-
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_BACKEND_TYPE,
-					MV_INFERENCE_BACKEND_TFLITE);
-
-	mv_engine_config_set_int_attribute(
-					handle, MV_INFERENCE_TARGET_TYPE,
-					 MV_INFERENCE_TARGET_CPU);
-
-	*engine_cfg = handle;
-	return err;
-}
-
-int perform_face_detection()
-{
-	int err = MEDIA_VISION_ERROR_NONE;
-
-	int sel_opt = 0;
-	const char *names[] = { "Configuration",
-							 "TFLite(CPU) + MobileNetV1 + SSD",
-							 "OPENCV(CPU) + Resnet10 + SSD",
-							 "ARMNN(CPU) + MobileNetV1 + SSD",
-							 "Hosted: TFLite(cpu + MobilenetV1+SSD)",
-							 "Hosted: TFLite(cpu + BlazeFace)",
-							 "Prepare",
-							 "Run",
-							 "Back" };
-
-	mv_engine_config_h engine_cfg = NULL;
-	mv_inference_h infer = NULL;
-	mv_source_h mvSource = NULL;
-
-	while (sel_opt == 0) {
-		sel_opt =
-				show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
-		switch (sel_opt) {
-		case 1: {
-			//perform configuration
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-
-			err = perform_configuration(&engine_cfg);
-		} break;
-		case 2: {
-			//perform TFlit Mobilenetv1ssd config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-
-			err = perform_tflite_mobilenetv1ssd_face(&engine_cfg);
-		} break;
-		case 3: {
-			//perform Opencv resenet10ssd config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-			}
-
-			err = perform_opencv_resnet10ssd_face(&engine_cfg);
-		} break;
-		case 4: {
-			//perform Armnn Mobilenetv1ssd config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-			}
-
-			err = perform_armnn_mobilenetv1ssd_face(&engine_cfg);
-		} break;
-		case 5: {
-			//perform hosted TFlite Mobilenetv1ssd config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-
-			err = perform_hosted_tflite_mobilenetv1ssd_300_config_face(&engine_cfg);
-		} break;
-		case 6: {
-			//perform hosted TFlite blazeface config
-			if (engine_cfg) {
-				int err2 = mv_destroy_engine_config(engine_cfg);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-				engine_cfg = NULL;
-			}
-
-			err = perform_hosted_tflite_blazeface_128_config_face(&engine_cfg);
-		} break;
-		case 7: {
-			// create - configure - prepare
-			if (infer) {
-				int err2 = mv_inference_destroy(infer);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy inference handle [err:%i]\n", err2);
-				infer = NULL;
-			}
-
-			// inference
-			// create handle
-			err = mv_inference_create(&infer);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to create inference handle [err:%i]\n", err);
-				break;
-			}
-
-			//configure
-			err = mv_inference_configure(infer, engine_cfg);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to configure inference handle [err:%i]\n", err);
-				break;
-			}
+									 "TFLite_Detection_PostProcess:1",
+									 "TFLite_Detection_PostProcess:2",
+									 "TFLite_Detection_PostProcess:3" };
 
-			//prepare
-			err = mv_inference_prepare(infer);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to prepare inference handle");
-				break;
-			}
-		} break;
-		case 8: {
-			if (mvSource) {
-				int err2 = mv_destroy_source(mvSource);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource\n");
-				mvSource = NULL;
-			}
-
-			char *in_file_name = NULL;
-			/* Load media source */
-			while (input_string("Input file name to be inferred:", 1024,
-								&(in_file_name)) == -1)
-				printf("Incorrect input! Try again.\n");
-
-			err = mv_create_source(&mvSource);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				printf("Fail to create mvSource.\n");
-				free(in_file_name);
-				break;
-			}
-
-			err = load_mv_source_from_file(in_file_name, mvSource);
-			if (err != MEDIA_VISION_ERROR_NONE) {
-				int err2 = mv_destroy_source(mvSource);
-				if (err2 != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource. error code:%i\n", err2);
-				mvSource = NULL;
-				free(in_file_name);
-				break;
-			}
-			free(in_file_name);
-
-			struct timespec s_tspec;
-			struct timespec e_tspec;
-
-			clock_gettime(CLOCK_MONOTONIC, &s_tspec);
-
-			// Object Detect
-			err = mv_inference_face_detect(mvSource, infer, _face_detected_cb,
-										   NULL);
-
-			clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+	RET_IF_FAIL(engine_config_hosted_tflite_cpu(handle, FD_TFLITE_WEIGHT_PATH));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 127.5));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName));
+	RET_IF_FAIL(mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4));
+	return MEDIA_VISION_ERROR_NONE;
+}
 
-			struct timespec diffspec = diff(s_tspec, e_tspec);
-			unsigned long timeDiff = gettotalmillisec(diffspec);
-			printf("elapsed time : %lu(ms)\n", timeDiff);
-		} break;
-		case 9: {
-			//perform destroy
-			if (engine_cfg) {
-				err = mv_destroy_engine_config(engine_cfg);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy engine_cfg [err:%i]\n", err);
-				engine_cfg = NULL;
-			}
+int perform_opencv_resnet10ssd_face(mv_engine_config_h handle)
+{
+	const char *inputNodeName = "data";
+	const char *outputNodeName[] = { "detection_out" };
 
-			if (infer) {
-				err = mv_inference_destroy(infer);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy inference handle [err:%i]\n", err);
-				infer = NULL;
-			}
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+			FD_OPENCV_WEIGHT_CAFFE_PATH));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
+			FD_OPENCV_CONFIG_CAFFE_PATH));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 135.7));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_OPENCV));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName));
+	RET_IF_FAIL(mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1));
+	return MEDIA_VISION_ERROR_NONE;
+}
 
-			if (mvSource) {
-				err = mv_destroy_source(mvSource);
-				if (err != MEDIA_VISION_ERROR_NONE)
-					printf("Fail to destroy mvSource [err:%i]\n", err);
-				mvSource = NULL;
-			}
-		} break;
-		default:
-			printf("Invalid option.\n");
-			sel_opt = 0;
-			continue;
-		}
+int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h handle)
+{
+	const char *inputNodeName = "normalized_input_image_tensor";
+	const char *outputNodeName[] = { "TFLite_Detection_PostProcess",
+									 "TFLite_Detection_PostProcess:1",
+									 "TFLite_Detection_PostProcess:2",
+									 "TFLite_Detection_PostProcess:3" };
 
-		int do_another = 0;
-		if (err != MEDIA_VISION_ERROR_NONE) {
-			printf("ERROR: Action is finished with error code:%i\n", err);
-		}
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+			FD_TFLITE_WEIGHT_PATH));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 127.5));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_ARMNN));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName));
+	RET_IF_FAIL(mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4));
+	return MEDIA_VISION_ERROR_NONE;
+}
 
-		sel_opt = 0;
-		while (sel_opt == 0) {
-			sel_opt = show_menu_yes_or_no("Run Face Detection again?:");
-			switch (sel_opt) {
-			case 1:
-				do_another = 1;
-				break;
-			case 2:
-				do_another = 0;
-				break;
-			default:
-				printf("Invalid option.\n");
-				sel_opt = 0;
-			}
-		}
+int perform_face_detection()
+{
+	int err = MEDIA_VISION_ERROR_NONE;
+	mv_engine_config_h engine_cfg = NULL;
+	const char *names[] = {
+		"TFLite(CPU) + MobileNetV1 + SSD",
+		"OPENCV(CPU) + Resnet10 + SSD",
+		"ARMNN(CPU) + MobileNetV1 + SSD",
+		"Hosted: TFLite(cpu + MobilenetV1+SSD)",
+		"Hosted: TFLite(cpu + BlazeFace)",
+	};
 
-		sel_opt = (do_another == 1) ? 0 : 1;
+	int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
+	if (sel_opt <= 0 || sel_opt > ARRAY_SIZE(names)) {
+		printf("Invalid option");
+		return -1;
 	}
 
-	if (engine_cfg) {
-		err = mv_destroy_engine_config(engine_cfg);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy engine_cfg [err:%i]\n", err);
-		engine_cfg = NULL;
-	}
+	RET_IF_FAIL(mv_create_engine_config(&engine_cfg));
 
-	if (infer) {
-		err = mv_inference_destroy(infer);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy inference handle [err:%i]\n", err);
-		infer = NULL;
+	switch (sel_opt) {
+	case 1: {
+		err = perform_tflite_mobilenetv1ssd_face(engine_cfg);
+	} break;
+	case 2: {
+		err = perform_opencv_resnet10ssd_face(engine_cfg);
+	} break;
+	case 3: {
+		err = perform_armnn_mobilenetv1ssd_face(engine_cfg);
+	} break;
+	case 4: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH);
+	} break;
+	case 5: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, FD_TFLITE_WEIGHT_BLAZEFACE_128_PATH);
+	} break;
 	}
-
-	if (mvSource) {
-		err = mv_destroy_source(mvSource);
-		if (err != MEDIA_VISION_ERROR_NONE)
-			printf("Fail to destroy mvSource [err:%i]\n", err);
-		mvSource = NULL;
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Fail to perform config [err:%i]\n", err);
+		goto clean_face_engine;
 	}
 
-	return MEDIA_VISION_ERROR_NONE;
+	err = mv_inference_task_helper(engine_cfg, TASK_FD);
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to detect with engine [err:%i]\n", err);
+
+clean_face_engine:
+	RET_IF_FAIL(mv_destroy_engine_config(engine_cfg));
+	return err;
 }
 
 int perform_tflite_TweakCNN(mv_engine_config_h handle)
@@ -2985,7 +2641,9 @@ int perform_facial_landmark_detection()
 		goto clean_facial_landmark_engine;
 	}
 
-	RET_IF_FAIL(mv_inference_task_helper(engine_cfg, TASK_FLD));
+	err = mv_inference_task_helper(engine_cfg, TASK_FLD);
+	if (err != MEDIA_VISION_ERROR_NONE)
+		printf("Fail to detect with engine [err:%i]\n", err);
 
 clean_facial_landmark_engine:
 	RET_IF_FAIL(mv_destroy_engine_config(engine_cfg));
@@ -2995,21 +2653,13 @@ clean_facial_landmark_engine:
 int engine_config_hosted_tflite_cpu(mv_engine_config_h handle,
 									const char *tf_weight)
 {
-	int err = mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
-											 MV_INFERENCE_BACKEND_TFLITE);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
-											 MV_INFERENCE_TARGET_CPU);
-	return err;
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_BACKEND_TYPE, MV_INFERENCE_BACKEND_TFLITE));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_TARGET_TYPE, MV_INFERENCE_TARGET_CPU));
+	return MEDIA_VISION_ERROR_NONE;
 }
 
 int perform_armnn_cpm_config(mv_engine_config_h handle)
@@ -3017,68 +2667,29 @@ int perform_armnn_cpm_config(mv_engine_config_h handle)
 	const char *inputNodeName = "image";
 	const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" };
 
-	int err = engine_config_hosted_tflite_cpu(handle, PLD_TFLITE_WEIGHT_PATH);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_int_attribute(
-			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_double_attribute(
-			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_int_attribute(
-			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 192);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_int_attribute(
-			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 192);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_int_attribute(
-			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_string_attribute(
-			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName);
-	if (err != MEDIA_VISION_ERROR_NONE) {
-		return err;
-	}
-
-	err = mv_engine_config_set_array_string_attribute(
-			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
-	return err;
+	RET_IF_FAIL(
+			engine_config_hosted_tflite_cpu(handle, PLD_TFLITE_WEIGHT_PATH));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_FLOAT32));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_MODEL_STD_VALUE, 1.0));
+	RET_IF_FAIL(mv_engine_config_set_double_attribute(
+			handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, 192));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 192));
+	RET_IF_FAIL(mv_engine_config_set_int_attribute(
+			handle, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3));
+	RET_IF_FAIL(mv_engine_config_set_string_attribute(
+			handle, MV_INFERENCE_INPUT_NODE_NAME, inputNodeName));
+	RET_IF_FAIL(mv_engine_config_set_array_string_attribute(
+			handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1));
+	return MEDIA_VISION_ERROR_NONE;
 }
 
 int perform_pose_landmark_detection()
@@ -3105,18 +2716,18 @@ int perform_pose_landmark_detection()
 	}
 
 	switch (sel_opt) {
-	case 1: { //perform TweakCNN config
+	case 1: {
 		err = perform_armnn_cpm_config(engine_cfg);
 	} break;
-	case 2: { //perform cpm config
+	case 2: {
 		err = engine_config_hosted_tflite_cpu(engine_cfg,
 											  PLD_TFLITE_WEIGHT_CPM_192_PATH);
 	} break;
-	case 3: { //perform mobilenet-v1 posenet config
+	case 3: {
 		err = engine_config_hosted_tflite_cpu(
 				engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH);
 	} break;
-	case 4: { //perform int8 movenet
+	case 4: {
 		err = engine_config_hosted_tflite_cpu(
 				engine_cfg, PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH);
 	} break;
@@ -3131,9 +2742,7 @@ int perform_pose_landmark_detection()
 		printf("Fail to detect with engine [err:%i]\n", err);
 
 clean_pose_engine:
-	err = mv_destroy_engine_config(engine_cfg);
-	if (err != MEDIA_VISION_ERROR_NONE)
-		printf("Fail to destroy engine_cfg [err:%i]\n", err);
+	RET_IF_FAIL(mv_destroy_engine_config(engine_cfg));
 
 	return err;
 }
-- 
2.7.4