From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Fri, 12 Jun 2020 01:24:49 +0000 (+0900)
Subject: [testsuite/stream_infer] Support hand gesture model and add stream_infer
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8fb53bd7ce9e0dcbb18c07b2c0182e3cc4a74fec;p=platform%2Fcore%2Fapi%2Fmediavision.git

[testsuite/stream_infer] Support hand gesture model and add stream_infer

Hand gesture model consists of two models. One is a model that gets an image
as an input and gives a hand segmentation and heatmaps as an output. Another is
a model that gets filtered heatmaps, raw tensor buffer, as an input and gives
coordinate and a gesture as an output.

To support that, apis are added:
mv_source_fill_by_tensor_buffer()
 v_inference_hand_detect()
mv_inference_pose_estimation_detect()

To test the models,
stream_infer testsuite is added.

Change-Id: Id150dd893c229e2a207e099b46f8d53b029e291f
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---

diff --git a/include/mv_common.h b/include/mv_common.h
index dc2faf88..f1244c71 100644
--- a/include/mv_common.h
+++ b/include/mv_common.h
@@ -19,6 +19,8 @@
 
 #include <media_packet.h>
 
+#include <mv_inference_type.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
@@ -245,6 +247,16 @@ int mv_source_fill_by_buffer(
 		unsigned int image_height,
 		mv_colorspace_e image_colorspace);
 
+int mv_source_fill_by_tensor_buffer(
+		mv_source_h source,
+		void *data_buffer,
+		mv_inference_data_type_e type, // common type may be required. and then it will be converted to inference data type
+		unsigned int buffer_size,
+		unsigned int width,
+		unsigned int height,
+		unsigned int channel,
+		unsigned int dimension);
+
 /**
  * @brief Clears the buffer of the media source.
  *
@@ -285,6 +297,10 @@ int mv_source_get_buffer(
 		unsigned char **data_buffer,
 		unsigned int *buffer_size);
 
+int mv_source_get_tensor_buffer(
+		mv_source_h source,
+		void **data_buffer,
+		unsigned int *buffer_size);
 /**
  * @brief Gets height of the media source.
  *
@@ -323,6 +339,14 @@ int mv_source_get_width(
 		mv_source_h source,
 		unsigned int *image_width);
 
+int mv_source_get_channel(
+		mv_source_h source,
+		unsigned int *channel);
+
+int mv_source_get_dimension(
+		mv_source_h source,
+		unsigned int *dims);
+
 /**
  * @brief Gets colorspace of the media source.
  *
@@ -342,6 +366,8 @@ int mv_source_get_colorspace(
 		mv_source_h source,
 		mv_colorspace_e *image_colorspace);
 
+
+bool mv_source_is_tensor(mv_source_h source);
 /**
  * @brief Creates the handle to the configuration of engine.
  *
diff --git a/include/mv_inference.h b/include/mv_inference.h
index 5af41930..2b606685 100644
--- a/include/mv_inference.h
+++ b/include/mv_inference.h
@@ -724,6 +724,8 @@ int mv_inference_pose_estimation_detect(
 	mv_source_h source,
 	mv_inference_h infer,
 	mv_rectangle_s *roi,
+	float scale_width,
+	float scale_height,
 	mv_inference_pose_estimation_detected_cb detected_cb,
 	void *user_data);
 
diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h
index dbe0a85e..41a9e9de 100644
--- a/include/mv_inference_type.h
+++ b/include/mv_inference_type.h
@@ -51,7 +51,7 @@ typedef enum {
  * @deprecated Deprecated since 6.0. Use #mv_inference_target_device_e instead.
  * @brief Enumeration for inference target.
  *
- * @since_tizem 5.5
+ * @since_tizen 5.5
  *
  */
 typedef enum {
diff --git a/mv_common/include/MediaSource.h b/mv_common/include/MediaSource.h
index 0bd5e030..c67f4b78 100644
--- a/mv_common/include/MediaSource.h
+++ b/mv_common/include/MediaSource.h
@@ -119,13 +119,15 @@ public:
 	bool fill(const unsigned char *buffer, unsigned int bufferSize,
 			unsigned int width, unsigned int height, size_t offset);
 
+	bool fill(void *buffer, mv_inference_data_type_e type, unsigned int bufferSize,
+			unsigned int width, unsigned int height, unsigned int channel, unsigned int dimension);
 	/**
 	 * @brief Gets data buffer of the MediaSource.
 	 *
 	 * @since_tizen @if MOBILE 2.4 @else 3.0 @endif
 	 * @return Pointer to the data buffer.
 	 */
-	unsigned char *getBuffer(void) const;
+	void *getBuffer(void) const;
 
 	/**
 	 * @brief Gets buffer size of the MediaSource.
@@ -151,6 +153,9 @@ public:
 	 */
 	unsigned int getHeight(void) const;
 
+	unsigned int getChannel(void) const;
+
+	unsigned int getDimension(void) const;
 	/**
 	 * @brief Gets image colorspace of the MediaSource.
 	 *
@@ -159,8 +164,12 @@ public:
 	 */
 	mv_colorspace_e getColorspace(void) const;
 
+	mv_inference_data_type_e getType(void) const;
+
+	bool getIsTensor(void) const;
+
 private:
-	unsigned char *m_pBuffer;        /**< The data buffer */
+	void *m_pBuffer;        /**< The data buffer */
 
 	unsigned int m_bufferSize;       /**< The buffer size */
 
@@ -169,6 +178,12 @@ private:
 	unsigned int m_height;           /**< The image height */
 
 	mv_colorspace_e m_colorspace;    /**< The image colorspace */
+
+	unsigned int m_dim;
+	unsigned int m_ch;
+	bool m_isTensor;
+	mv_inference_data_type_e m_type;
+
 };
 
 } /* Common */
diff --git a/mv_common/include/mv_common_c.h b/mv_common/include/mv_common_c.h
index 234fc529..539566c1 100644
--- a/mv_common/include/mv_common_c.h
+++ b/mv_common/include/mv_common_c.h
@@ -108,6 +108,16 @@ int mv_source_fill_by_buffer_c(
 		unsigned int image_height,
 		mv_colorspace_e image_colorspace);
 
+int mv_source_fill_by_tensor_buffer_c(
+		mv_source_h source,
+		void *data_buffer,
+		mv_inference_data_type_e type, // common type may be required. and then it will be converted to inference data type
+		unsigned int buffer_size,
+		unsigned int width,
+		unsigned int height,
+		unsigned int channel,
+		unsigned int dimension);
+
 /**
  * @brief Clears the buffer of the media source.
  *
@@ -146,6 +156,11 @@ int mv_source_get_buffer_c(
 		unsigned char **data_buffer,
 		unsigned int *buffer_size);
 
+int mv_source_get_tensor_buffer_c(
+		mv_source_h source,
+		void **buffer,
+		unsigned int *size);
+
 /**
  * @brief Gets height of the media source.
  *
@@ -182,6 +197,14 @@ int mv_source_get_width_c(
 		mv_source_h source,
 		unsigned int *image_width);
 
+int mv_source_get_channel_c(
+		mv_source_h source,
+		unsigned int *channel);
+
+
+int mv_source_get_dimension_c(
+		mv_source_h source,
+		unsigned int *dims);
 /**
  * @brief Gets colorspace of the media source.
  *
@@ -200,6 +223,9 @@ int mv_source_get_colorspace_c(
 		mv_source_h source,
 		mv_colorspace_e *image_colorspace);
 
+bool mv_source_is_tensor_c(mv_source_h source);
+
+
 /**
  * @brief Creates the handle to the configuration of engine.
  *
diff --git a/mv_common/src/MediaSource.cpp b/mv_common/src/MediaSource.cpp
index 50f956c7..cb5fc874 100644
--- a/mv_common/src/MediaSource.cpp
+++ b/mv_common/src/MediaSource.cpp
@@ -29,7 +29,11 @@ MediaSource::MediaSource() :
 	m_bufferSize(0),
 	m_width(0),
 	m_height(0),
-	m_colorspace(MEDIA_VISION_COLORSPACE_INVALID)
+	m_colorspace(MEDIA_VISION_COLORSPACE_INVALID),
+	m_dim(0),
+	m_ch(0),
+	m_isTensor(false),
+	m_type(MV_INFERENCE_DATA_UINT8)
 {
 }
 
@@ -66,6 +70,7 @@ bool MediaSource::alloc(unsigned int bufferSize,
 			"the media source %p", colorspace, this);
 	m_colorspace = colorspace;
 
+	m_type = MV_INFERENCE_DATA_UINT8;
 	return true;
 }
 
@@ -73,7 +78,10 @@ void MediaSource::clear(void)
 {
 	if (m_pBuffer != NULL) {
 		LOGD("Delete internal buffer for media source %p", this);
-		delete[] m_pBuffer;
+		if (m_type == MV_INFERENCE_DATA_FLOAT32)
+			delete[] static_cast<float*>(m_pBuffer);
+		else
+			delete[] static_cast<unsigned char*>(m_pBuffer);
 	}
 	LOGD("Set defaults for media source %p : buffer = NULL; "
 			"bufferSize = 0; width = 0; height = 0; "
@@ -83,6 +91,10 @@ void MediaSource::clear(void)
 	m_width = 0;
 	m_height = 0;
 	m_colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+	m_ch = 0;
+	m_dim = 0;
+	m_isTensor = false;
+	m_type = MV_INFERENCE_DATA_UINT8;
 }
 
 bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize,
@@ -121,6 +133,7 @@ bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize,
 	LOGD("Assign new colorspace (%i) of the internal buffer image for "
 			"the media source %p", colorspace, this);
 	m_colorspace = colorspace;
+	m_type = MV_INFERENCE_DATA_UINT8;
 
 	return true;
 }
@@ -144,18 +157,74 @@ bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize,
 
 	LOGD("Copy data from external buffer (%p) to the internal buffer (%p + %zd) of "
 			"media source %p", buffer, m_pBuffer, offset, this);
-	std::memcpy(m_pBuffer + offset, buffer, bufferSize);
+	std::memcpy(static_cast<unsigned char*>(m_pBuffer) + offset, buffer, bufferSize);
 
 	LOGD("size is %ui x %ui [%ui] on buffer(%p).", width, height, bufferSize, this);
 
+	m_type = MV_INFERENCE_DATA_UINT8;
+
+	return true;
+}
+
+bool MediaSource::fill(void *buffer, mv_inference_data_type_e type, unsigned int bufferSize,
+			unsigned int width, unsigned int height, unsigned int channel, unsigned int dimension)
+{
+	if (bufferSize == 0 || buffer == NULL)
+		return false;
+
+	LOGD("Call clear() first for media source %p", this);
+	clear();
+
+	LOGD("Allocate memory [%i] for buffer in media source %p", bufferSize, this);
+	LOGD("Assign new size (%ui x %ui) of the internal buffer image for "
+			"the media source %p", width, height, this);
+	LOGD("Assign new buffer with type (%i) of the internal buffer for "
+		"the source %p", type, this);
+	if (type == MV_INFERENCE_DATA_FLOAT32) {
+		m_pBuffer = new (std::nothrow)float[bufferSize];
+	} else if (type == MV_INFERENCE_DATA_UINT8) {
+		m_pBuffer = new (std::nothrow)unsigned char[bufferSize];
+	}
+
+	if (m_pBuffer == NULL) {
+		LOGE("Memory allocating for buffer in media source %p failed!", this);
+		return false;
+	}
+
+	LOGD("Copy data from external buffer (%p) to the internal buffer (%p) of "
+			"media source %p", buffer, m_pBuffer, this);
+	std::memcpy(m_pBuffer, buffer, bufferSize);
+
+	LOGD("Assign new size of the internal buffer of media source %p. "
+			"New size is %ui.", this, bufferSize);
+	m_bufferSize = bufferSize;
+
+	LOGD("Assign new size (%ui x %ui x %ui) of the internal buffer image for "
+			"the media source %p", width, height, channel, this);
+	m_width = width;
+	m_height = height;
+	m_ch = channel;
+	m_dim = dimension;
+	m_isTensor = true;
+	m_type = type;
+
+	LOGD("Assign new colorspace (%i) of the internal buffer image for "
+			"the media source %p", MEDIA_VISION_COLORSPACE_INVALID, this);
+	m_colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+
 	return true;
 }
 
-unsigned char *MediaSource::getBuffer(void) const
+void *MediaSource::getBuffer(void) const
 {
 	return m_pBuffer;
 }
 
+mv_inference_data_type_e MediaSource::getType(void) const
+{
+	return m_type;
+}
+
 unsigned int MediaSource::getBufferSize(void) const
 {
 	return m_bufferSize;
@@ -171,10 +240,24 @@ unsigned int MediaSource::getHeight(void) const
 	return m_height;
 }
 
+unsigned int MediaSource::getChannel(void) const
+{
+	return m_ch;
+}
+
+unsigned int MediaSource::getDimension(void) const
+{
+	return m_dim;
+}
+
 mv_colorspace_e MediaSource::getColorspace(void) const
 {
 	return m_colorspace;
 }
 
+bool MediaSource::getIsTensor(void) const
+{
+	return m_isTensor;
+}
 } /* Common */
 } /* MediaVision */
diff --git a/mv_common/src/mv_common.c b/mv_common/src/mv_common.c
index c01536d0..283a19de 100644
--- a/mv_common/src/mv_common.c
+++ b/mv_common/src/mv_common.c
@@ -80,6 +80,29 @@ int mv_source_fill_by_buffer(
 	return ret;
 }
 
+int mv_source_fill_by_tensor_buffer(
+		mv_source_h source,
+		void *data_buffer,
+		mv_inference_data_type_e type,
+		unsigned int buffer_size,
+		unsigned int width,
+		unsigned int height,
+		unsigned int channel,
+		unsigned int dimension)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_fill_by_tensor_buffer_c(
+		source, data_buffer, type, buffer_size, width, height,
+		channel, dimension);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_source_clear(
 		mv_source_h source)
 {
@@ -110,6 +133,23 @@ int mv_source_get_buffer(
 	return ret;
 }
 
+int mv_source_get_tensor_buffer(
+		mv_source_h source,
+		void **data_buffer,
+		unsigned int *buffer_size)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+	MEDIA_VISION_NULL_ARG_CHECK(buffer_size);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_get_tensor_buffer_c(source, data_buffer, buffer_size);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_source_get_height(
 		mv_source_h source,
 		unsigned int *image_height)
@@ -140,6 +180,36 @@ int mv_source_get_width(
 	return ret;
 }
 
+int mv_source_get_channel(
+		mv_source_h source,
+		unsigned int *channel)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(channel);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_get_channel_c(source, channel);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
+int mv_source_get_dimension(
+		mv_source_h source,
+		unsigned int *dims)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(dims);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_get_dimension_c(source, dims);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_source_get_colorspace(
 		mv_source_h source,
 		mv_colorspace_e *image_colorspace)
@@ -155,6 +225,19 @@ int mv_source_get_colorspace(
 	return ret;
 }
 
+bool mv_source_is_tensor(mv_source_h source)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	bool ret = mv_source_is_tensor_c(source);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
+
 int mv_create_engine_config(
 		mv_engine_config_h *engine_cfg)
 {
diff --git a/mv_common/src/mv_common_c.cpp b/mv_common/src/mv_common_c.cpp
index 28ba8382..50b9d860 100644
--- a/mv_common/src/mv_common_c.cpp
+++ b/mv_common/src/mv_common_c.cpp
@@ -232,6 +232,34 @@ int mv_source_fill_by_buffer_c(
 	return MEDIA_VISION_ERROR_NONE;
 }
 
+int mv_source_fill_by_tensor_buffer_c(
+		mv_source_h source,
+		void *data_buffer,
+		mv_inference_data_type_e type,
+		unsigned int buffer_size,
+		unsigned int width,
+		unsigned int height,
+		unsigned int channel,
+		unsigned int dimension)
+{
+	if (!source || buffer_size == 0 || data_buffer == NULL) {
+		LOGE("Media source can't be filled by tensor buffer because "
+				"one of the source or data_buffer is NULL or buffer_size = 0. "
+				"source = %p; data_buffer = %p; buffer_size = %u",
+				source, data_buffer, buffer_size);
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+	if (!(static_cast<MediaVision::Common::MediaSource*>(source))->fill(data_buffer,
+			type, buffer_size, width, height, channel, dimension)) {
+		LOGE("mv_source_h filling from tehsor buffer failed");
+		return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+	}
+
+	LOGD("Media source has been filled from buffer");
+	return MEDIA_VISION_ERROR_NONE;
+}
+
 int mv_source_clear_c(
 		mv_source_h source)
 {
@@ -258,6 +286,25 @@ int mv_source_get_buffer_c(
 	}
 
 	LOGD("Get media vision source [%p] buffer and buffer size to be returned", source);
+	*buffer = static_cast<unsigned char*>((static_cast<MediaVision::Common::MediaSource*>(source))->getBuffer());
+	*size = (static_cast<MediaVision::Common::MediaSource*>(source))->getBufferSize();
+	LOGD("Media vision source [%p] buffer (%p) and buffer size (%ui) has been returned", source, buffer, *size);
+
+	return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_source_get_tensor_buffer_c(
+		mv_source_h source,
+		void **buffer,
+		unsigned int *size)
+{
+	if (!source) {
+		LOGE("Impossible to get buffer for NULL mv_source_h handle");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+	LOGD("Get media vision source [%p] buffer and buffer size to be returned", source);
+
 	*buffer = (static_cast<MediaVision::Common::MediaSource*>(source))->getBuffer();
 	*size = (static_cast<MediaVision::Common::MediaSource*>(source))->getBufferSize();
 	LOGD("Media vision source [%p] buffer (%p) and buffer size (%ui) has been returned", source, buffer, *size);
@@ -265,6 +312,7 @@ int mv_source_get_buffer_c(
 	return MEDIA_VISION_ERROR_NONE;
 }
 
+
 int mv_source_get_height_c(
 		mv_source_h source,
 		unsigned int *height)
@@ -297,6 +345,39 @@ int mv_source_get_width_c(
 	return MEDIA_VISION_ERROR_NONE;
 }
 
+int mv_source_get_channel_c(
+		mv_source_h source,
+		unsigned int *channel)
+{
+	if (!source) {
+		LOGE("Impossible to get challen for NULL mv_source_h handle");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+	LOGD("Get media vision source [%p] width to be returned", source);
+	*channel = (static_cast<MediaVision::Common::MediaSource*>(source))->getChannel();
+	LOGD("Media vision source [%p] channel (%ui) has been returned", source, *channel);
+
+	return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_source_get_dimension_c(
+		mv_source_h source,
+		unsigned int *dims)
+{
+	if (!source) {
+		LOGE("Impossible to get width for NULL mv_source_h handle");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+	LOGD("Get media vision source [%p] dims to be returned", source);
+	*dims = (static_cast<MediaVision::Common::MediaSource*>(source))->getDimension();
+	LOGD("Media vision source [%p] dims (%ui) has been returned", source, *dims);
+
+	return MEDIA_VISION_ERROR_NONE;
+}
+
+
 int mv_source_get_colorspace_c(
 		mv_source_h source,
 		mv_colorspace_e *colorspace)
@@ -313,6 +394,23 @@ int mv_source_get_colorspace_c(
 	return MEDIA_VISION_ERROR_NONE;
 }
 
+bool mv_source_is_tensor_c(mv_source_h source)
+{
+	// int mv_source_is_tensor_c(mv_source_h source, bool *isTensor)
+	/*
+	if (!source) {
+		LOGE("mv_source_h handle is NULL");
+		return ;
+	}
+	*/
+
+	LOGD("Get media vision source [%p]", source);
+	bool ret = (static_cast<MediaVision::Common::MediaSource*>(source))->getIsTensor();
+	LOGD("Media vision source [%p] is %s", source, ret ? "tensor" : "not tensor");
+
+	return ret;
+}
+
 int mv_create_engine_config_c(
 		mv_engine_config_h *engine_cfg)
 {
diff --git a/mv_inference/inference/CMakeLists.txt b/mv_inference/inference/CMakeLists.txt
index 362bc9fb..0b8522b2 100644
--- a/mv_inference/inference/CMakeLists.txt
+++ b/mv_inference/inference/CMakeLists.txt
@@ -28,7 +28,7 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXXFLAGS} -Wno-unused-parameter
 FILE(GLOB MV_INFERENCE_INCLUDE_LIST "${PROJECT_SOURCE_DIR}/include/*.h" "${PROJECT_SOURCE_DIR}/include/*.hpp")
 FILE(GLOB MV_INFERENCE_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp")
 
-FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc)
+FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc imgcodecs)
 if(NOT OpenCV_FOUND)
 	MESSAGE(SEND_ERROR "OpenCV NOT FOUND")
 	RETURN()
diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h
index 4c126ce9..7422a0ab 100755
--- a/mv_inference/inference/include/Inference.h
+++ b/mv_inference/inference/include/Inference.h
@@ -62,13 +62,15 @@ typedef struct _FacialLandMarkDetectionResults {
 
 typedef struct _PoseEstimationResults {
     int number_of_pose_estimation;
-    std::vector<cv::Point> locations;
+    std::vector<cv::Point2f> locations;
+	int gesture;
 } PoseEstimationResults;  /**< structure PoseEstimationResults */
 
 typedef struct _HandDetectionResults {
     int number_of_hands;
     std::vector<float> confidences;
     std::vector<cv::Rect> locations;
+	void* outputTensorData;
 } HandDetectionResults;  /**< structure HandDetectionResults */
 
 namespace mediavision {
@@ -345,6 +347,8 @@ private:
 	cv::Size mSourceSize;
 	cv::Mat mInputBuffer;
 
+	cv::Mat mHeatMapMatrix;
+
 	mv_engine_config_h engine_config;
 
 	InferenceEngineCommon * mBackend;
diff --git a/mv_inference/inference/include/mv_inference_open.h b/mv_inference/inference/include/mv_inference_open.h
index 3bdc5593..20c7d7c8 100755
--- a/mv_inference/inference/include/mv_inference_open.h
+++ b/mv_inference/inference/include/mv_inference_open.h
@@ -530,6 +530,8 @@ int mv_inference_pose_estimation_detect_open(
 	mv_source_h source,
 	mv_inference_h infer,
     mv_rectangle_s *roi,
+	float scale_width,
+	float scale_height,
 	mv_inference_pose_estimation_detected_cb detected_cb,
 	void *user_data);
 
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp
index c7b5d2f2..da88c532 100755
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -17,7 +17,7 @@
 #include "mv_private.h"
 #include "Inference.h"
 #include "InferenceIni.h"
-
+#include <opencv2/imgcodecs.hpp>
 #include <map>
 
 #include <unistd.h>
@@ -686,6 +686,7 @@ int Inference::FillOutputResult(tensor_t &outputData)
 		}
 
 		outputData.data.push_back((void *)mOutputTensorBuffers[i].buffer);
+		LOGI("%p",  mOutputTensorBuffers[i].buffer);
 	}
 
 	return MEDIA_VISION_ERROR_NONE;
@@ -870,9 +871,9 @@ int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle
 	/* convert mv_source to cv::Mat */
 	cv::Mat cvSource;
 	cv::Rect cvRoi;
-	unsigned int width = 0, height = 0;
+	unsigned int width = 0, height = 0, channel = 0, dim = 0;
 	unsigned int bufferSize = 0;
-	unsigned char *buffer = NULL;
+	void *buffer = NULL;
 
 	if (mvSources.empty()) {
 		LOGE("mvSources should contain only one cv source.");
@@ -891,47 +892,63 @@ int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle
 
 	mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
 
-	if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
+	if (mv_source_is_tensor(mvSource)) {
+		if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
+			mv_source_get_height(mvSource, &height) != MEDIA_VISION_ERROR_NONE ||
+			mv_source_get_channel(mvSource, &channel) != MEDIA_VISION_ERROR_NONE ||
+			mv_source_get_dimension(mvSource, &dim) != MEDIA_VISION_ERROR_NONE ||
+			mv_source_get_tensor_buffer(mvSource, &buffer, &bufferSize))
+			return MEDIA_VISION_ERROR_INTERNAL;
+	} else {
+		if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
 			mv_source_get_height(mvSource, &height) != MEDIA_VISION_ERROR_NONE ||
 			mv_source_get_colorspace(mvSource, &colorspace) != MEDIA_VISION_ERROR_NONE ||
-			mv_source_get_buffer(mvSource, &buffer, &bufferSize))
-		return MEDIA_VISION_ERROR_INTERNAL;
+			mv_source_get_buffer(mvSource, reinterpret_cast<unsigned char**>(&buffer), &bufferSize))
+			return MEDIA_VISION_ERROR_INTERNAL;
 
-	// TODO. Let's support various color spaces.
+		// TODO. Let's support various color spaces.
 
-	if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
-		LOGE("Not Supported format!\n");
-		return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
-	}
+		if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
+			LOGE("Not Supported format!\n");
+			return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
+		}
 
-	if (roi == NULL) {
-		cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer).clone();
-	} else {
-		cvRoi.x = roi->point.x;
-		cvRoi.y = roi->point.y;
-		cvRoi.width = (roi->point.x + roi->width) >= width ? width - roi->point.x : roi->width;
-		cvRoi.height = (roi->point.y + roi->height) >= height ? height - roi->point.y : roi->height;
-		cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer)(cvRoi).clone();
-	}
+		if (roi == NULL) {
+			cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer).clone();
+		} else {
+			cvRoi.x = roi->point.x;
+			cvRoi.y = roi->point.y;
+			cvRoi.width = (roi->point.x + roi->width) >= width ? width - roi->point.x : roi->width;
+			cvRoi.height = (roi->point.y + roi->height) >= height ? height - roi->point.y : roi->height;
+			cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer)(cvRoi).clone();
+		}
 
-	LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height);
+		LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height);
 
-	if (mCh != 1 && mCh != 3) {
-		LOGE("Channel not supported.");
-		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		if (mCh != 1 && mCh != 3) {
+			LOGE("Channel not supported.");
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
 	}
 
+
+
 	std::vector<inference_engine_tensor_buffer>::iterator iter;
 	for (iter = mInputTensorBuffers.begin(); iter != mInputTensorBuffers.end(); iter++) {
 		inference_engine_tensor_buffer tensor_buffer = *iter;
 
-		int data_type = ConvertToCv(tensor_buffer.data_type);
+		if (mv_source_is_tensor(mvSource)) {
+			memcpy(tensor_buffer.buffer, buffer, bufferSize);
+		} else {
 
-		// Convert color space of input tensor data and then normalize it.
-		ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, data_type, tensor_buffer.buffer), data_type);
-		if (ret != MEDIA_VISION_ERROR_NONE) {
-			LOGE("Fail to preprocess input tensor data.");
-			return ret;
+			int data_type = ConvertToCv(tensor_buffer.data_type);
+
+			// Convert color space of input tensor data and then normalize it.
+			ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, data_type, tensor_buffer.buffer), data_type);
+			if (ret != MEDIA_VISION_ERROR_NONE) {
+				LOGE("Fail to preprocess input tensor data.");
+				return ret;
+			}
 		}
 	}
 
@@ -1262,6 +1279,7 @@ int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectio
 	std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
 	std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
 
+#if 0
 	long number_of_pose = inferDimInfo[0][3];
 	float * tmp = static_cast<float*>(inferResults[0]);
 	cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
@@ -1297,6 +1315,51 @@ int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectio
 
 	*detectionResults = results;
 	LOGE("Inference: PoseEstimationResults: %d\n", results.number_of_pose_estimation);
+
+	#else
+	for (int k = 0; k < inferDimInfo.size(); ++k) {
+		LOGI("output: %d", k);
+		LOGI("addr: %p", inferResults[k]);
+		for(int d = 0; d < inferDimInfo[k].size(); ++d) {
+			LOGI("%d", inferDimInfo[k][d]);
+		}
+		LOGI("\n");
+	}
+
+	//float* coordsPtr = reinterpret_cast<float*>(inferResults[0]);
+	int64_t * gesturePtr = reinterpret_cast<int64_t*>(inferResults[1]);
+	//LOGI("%lld", gesturePtr[0]);
+	cv::Mat gestureOut(cv::Size(inferDimInfo[1][0], inferDimInfo[1][1]), CV_8UC(sizeof(int64_t)), gesturePtr);
+	cv::Mat gestureReshaped = gestureOut.reshape(sizeof(int64_t), inferDimInfo[1][0]);
+	cv::Mat gestureArr[8];
+	cv::split(gestureReshaped, gestureArr);
+	int gesture = gestureArr[0].at<unsigned int>(0);
+
+	//float ratioX = (float)mSourceSize.width;;
+	//float ratioY = (float)mSourceSize.height;
+
+	int64_t * coordPtr = reinterpret_cast<int64_t*>(inferResults[0]);
+	long number_of_pose = inferDimInfo[0][1];
+	cv::Mat coordOut(cv::Size(inferDimInfo[0][0], inferDimInfo[0][1]), CV_32FC(inferDimInfo[0][2]), coordPtr);
+
+	PoseEstimationResults results;
+	results.number_of_pose_estimation = 0;
+	results.gesture = (int)gesture;
+
+	for (int poseIdx = 0; poseIdx < number_of_pose; poseIdx++) {
+		cv::Point2f loc;
+		cv::Vec2f coord = coordOut.at<cv::Vec2f>(0,poseIdx);
+		LOGI("PoseIdx[%2d]: x[%2f], y[%2f]", poseIdx, coord[0], coord[1]);
+
+		loc.x = coord[0];
+		loc.y = coord[1];
+		results.locations.push_back(loc);
+		results.number_of_pose_estimation++;
+	}
+
+	*detectionResults = results;
+
+	#endif
 	return MEDIA_VISION_ERROR_NONE;
 }
 
@@ -1311,88 +1374,186 @@ int Inference::GetHandDetectionResults(HandDetectionResults *detectionResults)
 		return ret;
 	}
 
-	// In case of object detection,
-	// a model may apply post-process but others may not.
-	// Thus, those cases should be hanlded separately.
+#if 1
 	std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
-	LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
-
 	std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-	LOGI("inferResults size: %zu", inferResults.size());
 
-	float* boxes = nullptr;
-	float* classes = nullptr;
-	float* scores = nullptr;
-	int number_of_detections = 0;
+	for (int k = 0; k < inferDimInfo.size(); ++k) {
+		LOGI("output: %d", k);
+		LOGI("addr: %p", inferResults[k]);
+		for(int d = 0; d < inferDimInfo[k].size(); ++d) {
+			LOGI("%d", inferDimInfo[k][d]);
+		}
+		LOGI("\n");
+	}
+	HandDetectionResults results;
+	results.number_of_hands = 0;
 
-	cv::Mat cvScores, cvClasses, cvBoxes;
-	if (outputData.dimInfo.size() == 1) {
-		// there is no way to know how many objects are detect unless the number of objects aren't
-		// provided. In the case, each backend should provide the number of results manually.
-		// For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
-		// written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
-		// indicats the image id. But it is useless if a batch mode isn't supported.
-		// So, use the 1st of 7.
+	// bbox
+	int64_t * bboxPtr = reinterpret_cast<int64_t*>(inferResults[0]);
+	float* heatmapPtr = reinterpret_cast<float*>(inferResults[1]);
 
-		number_of_detections = (int)(*reinterpret_cast<float*>(outputData.data[0]));
-		cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], CV_32F, outputData.data[0]);
+	double number_of_results = 0;
+	double maxValue = 0.0;
+	cv::Mat kernel = cv::Mat::ones(2,2, CV_8UC1);
 
-		// boxes
-		cv::Mat cvLeft = cvOutputData.col(3).clone();
-		cv::Mat cvTop = cvOutputData.col(4).clone();
-		cv::Mat cvRight = cvOutputData.col(5).clone();
-		cv::Mat cvBottom = cvOutputData.col(6).clone();
+	//int maskSz[] = {inferDimInfo[1][2], inferDimInfo[1][1], inferDimInfo[1][3]};
+	//int heatMapSz[] = {inferDimInfo[1][2], inferDimInfo[1][1], inferDimInfo[1][3]};
+	//dj
+	cv::Mat bboxOut(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), CV_8UC(sizeof(int64_t)), bboxPtr);
+	cv::Mat bboxReshaped = bboxOut.reshape(sizeof(int64_t), inferDimInfo[1][2]);
+	cv::Mat bboxArr[8];
+	cv::split(bboxReshaped, bboxArr);
+	cv::Mat bbox = bboxArr[0];
 
-		cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
-		cv::hconcat(cvBoxElems, 4, cvBoxes);
+	bbox =  bbox*255;
 
-		// classes
-		cvClasses = cvOutputData.col(1).clone();
+	////cv::imwrite("/tmp/dumpbbox.jpg",bbox);
+	cv::Mat bboxOpened, bboxClosed;
+	cv::Mat backGround, distTransformed, foreGroundF, foreGroundU;
+	cv::Mat unKnown, markers;
 
-		// scores
-		cvScores = cvOutputData.col(2).clone();
+	// opening
+	cv::morphologyEx(bbox, bboxOpened, cv::MORPH_OPEN, kernel);
+	// dilate
+	cv::dilate(bboxOpened, backGround, kernel);
 
-		boxes = cvBoxes.ptr<float>(0);
-		classes = cvClasses.ptr<float>(0);
-		scores = cvScores.ptr<float>(0);
+	// get euclidean distance by distance transform
+	cv::distanceTransform(bboxOpened, distTransformed, cv::DIST_L2, 5); // euclidean dist.
 
-	} else {
-		boxes = reinterpret_cast<float*>(inferResults[0]);
-		classes = reinterpret_cast<float*>(inferResults[1]);
-		scores = reinterpret_cast<float*>(inferResults[2]);
-		number_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
-	}
+	cv::minMaxLoc(distTransformed, NULL, &maxValue, NULL, NULL);
+	LOGI("max value of distTransformed: %f", maxValue);
+	cv::threshold(distTransformed, foreGroundF, 0.5*maxValue, 255, cv::THRESH_BINARY);
 
-	int left, top, right, bottom;
-	cv::Rect loc;
+	LOGI("type of foreGround: %d\n", foreGroundF.type()); //#define CV_8F   5
+	LOGI("type of backGround: %d\n", backGround.type()); //#define CV_8U   0
+	foreGroundF.convertTo(foreGroundU, CV_8U);
+	LOGI("type of backGround: %d\n", foreGroundU.type()); //#define CV_8U   0
+	cv::subtract(backGround, foreGroundU , unKnown);
+	LOGI("type of unKnown: %d\n", unKnown.type()); //
 
-	HandDetectionResults results;
-	results.number_of_hands = 0;
-	for (int idx = 0; idx < number_of_detections; ++idx) {
-		if (scores[idx] < mThreshold)
-			continue;
 
-		left =   (int)(boxes[idx*4 + 1] * mSourceSize.width);
-		top  =   (int)(boxes[idx*4 + 0] * mSourceSize.height);
-		right  = (int)(boxes[idx*4 + 3] * mSourceSize.width);
-		bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height);
+	cv::connectedComponents(foreGroundU, markers);
 
-		loc.x = left;
-		loc.y = top;
-		loc.width = right -left + 1;
-		loc.height = bottom - top + 1;
+	markers += 1;
 
-		results.confidences.push_back(scores[idx]);
-		results.locations.push_back(loc);
-		results.number_of_hands++;
+	markers.setTo(0, markers==255);
 
-		LOGI("confidence:%f", scores[idx]);
-		LOGI("class: %f", classes[idx]);
-		LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx*4 + 1], boxes[idx*4 + 0], boxes[idx*4 + 3], boxes[idx*4 + 2]);
-		LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
+	cv::minMaxLoc(markers, NULL, &number_of_results, NULL, NULL);
+	//results.number_of_pose_estimation = static_cast<int>(number_of_results)-1;
+	LOGI("number_of_results: %d", static_cast<int>(number_of_results)-1);
+
+	//cv::Mat heatMap = cv::Mat(3, heatMapSz, CV_32FC1, heatmapPtr);
+	cv::Mat heatMap = cv::Mat(cv::Size(inferDimInfo[1][2],inferDimInfo[1][1]), CV_32FC(inferDimInfo[1][3]), heatmapPtr);
+	cv::Mat heatMapReshape = heatMap.reshape(inferDimInfo[1][3], inferDimInfo[1][2]);
+	cv::Mat heatMapReshapeArr[inferDimInfo[1][3]];
+	cv::Mat heatMapMatrixArr[inferDimInfo[1][3]];
+	cv::split(heatMapReshape, heatMapReshapeArr);
+
+
+	cv::Mat mask = cv::Mat::zeros(inferDimInfo[0][2], inferDimInfo[0][1], CV_8UC1);
+	cv::Mat maskImg = cv::Mat::zeros(inferDimInfo[0][2], inferDimInfo[0][1], CV_8UC1);
+	cv::Mat kernel2 = cv::Mat::ones(3,3, CV_8UC1);
+
+
+	float ratioX = (float)mSourceSize.width / (float)inferDimInfo[0][2];
+	float ratioY = (float)mSourceSize.height / (float)inferDimInfo[0][1];
+	for (int resultIdx = 0; resultIdx <  (static_cast<int>(number_of_results)-1); resultIdx++) {
+		mask.setTo(1, markers==(resultIdx + 2));
+
+		cv::Mat maskIdx;
+		findNonZero(mask, maskIdx);
+		int numIdx = maskIdx.total();
+		LOGI("type of maskIdx: %d, and ch: %d, total: %d points\n", mask.type(), mask.channels(), numIdx );
+		int minX, minY;
+		int maxX, maxY;
+		int maxBBSize = 0;
+		int maxBBSizeIdx = -1;
+		minX = minY = 100;
+		maxX = maxY = -1;
+
+		for (int idx = 0; idx < numIdx; ++idx) {
+			//LOGI("%d, %d", mask.at<cv::Point>(0).x, mask.at<cv::Point>(0).y);
+			if (maskIdx.at<cv::Point>(idx).x < minX) {
+				minX = maskIdx.at<cv::Point>(idx).x;
+			}
+
+			if (maskIdx.at<cv::Point>(idx).y < minY) {
+				minY = maskIdx.at<cv::Point>(idx).y;
+			}
+
+			if (maskIdx.at<cv::Point>(idx).x >= maxX) {
+				maxX = maskIdx.at<cv::Point>(idx).x;
+			}
+
+			if (maskIdx.at<cv::Point>(idx).y >= maxY) {
+				maxY = maskIdx.at<cv::Point>(idx).y;
+			}
+		}
+
+		int bbSize = (maxX - minX) * (maxY - maxX);
+		if (bbSize > maxBBSize) {
+			maxBBSize = bbSize;
+			maxBBSizeIdx = resultIdx;
+
+			minX = (int)((float)minX * ratioX);
+			maxX = (int)((float)maxX * ratioX);
+			minY = (int)((float)minY * ratioY);
+			maxY = (int)((float)maxY * ratioY);
+			results.locations.push_back(cv::Rect(minX, minY, (maxX - minX +1), (maxY-minY +1)));
+		}
+
+		LOGI("(%d,%d) - (%d,%d): size %d(idx:%d)", minX, minY, maxX, maxY, maxBBSize, maxBBSizeIdx);
+
+		cv::dilate(mask, maskImg, kernel2, cv::Point(-1,-1), 4);
+
+		/*
+		cv::Mat maskImg3d(3, maskSz, CV_32FC1);
+
+		//cv::Mat maskImg3d = maskImg.reshape(1, 3, sz);
+
+		for (int d1 = 0; d1 < inferDimInfo[1][2]; ++d1) {
+			for (int d2 = 0; d2 < inferDimInfo[1][1]; ++d2) {
+				for (int d3 = 0; d3 < inferDimInfo[0][3]; ++d3) {
+					maskImg3d.at<float>(d1, d2, d3) = maskImg.at<float>(d1, d2);
+				}
+			}
+		}
+		*/
+
+		cv::Mat maskImgF;
+		maskImg.convertTo(maskImgF, CV_32FC1);
+		for (int ch = 0; ch < inferDimInfo[1][3]; ++ch ) {
+			cv::multiply(heatMapReshapeArr[ch], maskImgF, heatMapMatrixArr[ch]);
+			//char dumpName[1024];
+
+			//snprintf(dumpName, 1024, "/tmp/heatmapDump_%d.csv", k);
+			double maxVal;
+			minMaxLoc(heatMapMatrixArr[ch], NULL, &maxVal, NULL, NULL);
+			LOGI("%d: %f", ch, maxVal);
+			/*
+			snprintf(dumpName, 1024, "/tmp/heatmapDump_%d.csv", k);
+			LOGI("%s", dumpName);
+			std::ofstream dumpFile;
+			dumpFile.open(dumpName);
+			LOGI("tempArr shape: %dx%d", tempArr[k].size[0], tempArr[k].size[1]);
+			dumpFile << cv::format(tempArr[k], cv::Formatter::FMT_CSV) << std::endl;
+			dumpFile.close();
+			*/
+		}
+
+		cv::merge(heatMapMatrixArr, inferDimInfo[1][3], mHeatMapMatrix);
 	}
 
+	LOGE("heatmapMatrix: type[%d], size[%d], elemSize[%d]", mHeatMapMatrix.type(),
+						mHeatMapMatrix.total(), mHeatMapMatrix.elemSize());
+
+	results.number_of_hands = static_cast<int>(number_of_results)-1;
+	results.outputTensorData = mHeatMapMatrix.ptr<void*>();
+	LOGE("mHeatMapMatrix: %p", results.outputTensorData);
 	*detectionResults = results;
+#endif
+
 	LOGE("Inference: GetHandDetectionResults: %d\n", results.number_of_hands);
 	return MEDIA_VISION_ERROR_NONE;
 }
diff --git a/mv_inference/inference/src/mv_inference.c b/mv_inference/inference/src/mv_inference.c
index c08339c8..19fe9c83 100755
--- a/mv_inference/inference/src/mv_inference.c
+++ b/mv_inference/inference/src/mv_inference.c
@@ -319,6 +319,8 @@ int mv_inference_pose_estimation_detect(
 	mv_source_h source,
 	mv_inference_h infer,
 	mv_rectangle_s *roi,
+	float scale_width,
+	float scale_height,
 	mv_inference_pose_estimation_detected_cb detected_cb,
 	void *user_data)
 {
@@ -336,8 +338,8 @@ int mv_inference_pose_estimation_detect(
 	ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data);
 	*/
 #else
-
-	ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data);
+	LOGE("%p", user_data);
+	ret = mv_inference_pose_estimation_detect_open(source, infer, roi, scale_width, scale_height, detected_cb, user_data);
 
 	MEDIA_VISION_FUNCTION_LEAVE();
 
diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp
index 2de002a2..957bbd7c 100755
--- a/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_inference/inference/src/mv_inference_open.cpp
@@ -23,6 +23,9 @@
 #include <unistd.h>
 #include <string>
 
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+
 using namespace mediavision::inference;
 
 static int check_mv_inference_engine_version(mv_engine_config_h engine_config, bool *is_new_version)
@@ -811,6 +814,8 @@ int mv_inference_pose_estimation_detect_open(
 	mv_source_h source,
 	mv_inference_h infer,
 	mv_rectangle_s *roi,
+	float scale_width,
+	float scale_height,
 	mv_inference_pose_estimation_detected_cb detected_cb,
 	void *user_data)
 {
@@ -843,12 +848,40 @@ int mv_inference_pose_estimation_detect_open(
 
 	std::vector<mv_point_s> locations(numberOfPoseEstimation);
 
+	unsigned int tmpWidth = 0;
+	unsigned int tmpHeight = 0;
+	unsigned char *buffer = NULL;
+	unsigned int size = 0;
+
+	mv_source_h* tmpSource = (mv_source_h*)(user_data);
+	cv::Mat dumpMap;
+	if(user_data) {
+		mv_source_get_width(*tmpSource, &tmpWidth);
+		mv_source_get_height(*tmpSource, &tmpHeight);
+		LOGE("%d, %d", tmpWidth, tmpHeight);
+		mv_source_get_buffer(*tmpSource, &buffer, &size);
+		LOGE("%p", buffer);
+		dumpMap = cv::Mat(cv::Size(225,225), CV_8UC3, buffer);
+	} else {
+		LOGE("user_data is NULL");
+	}
+
 	for (int n = 0; n < numberOfPoseEstimation; ++n) {
 
-		locations[n].x = poseEstimationResults.locations[n].x;
-		locations[n].y = poseEstimationResults.locations[n].y;
-	}
+		locations[n].x = (int)(poseEstimationResults.locations[n].x * scale_width);
+		locations[n].y = (int)(poseEstimationResults.locations[n].y * scale_height);
 
+		cv::Point point((int)(poseEstimationResults.locations[n].x * scale_width),
+						(int)(poseEstimationResults.locations[n].y * scale_height));
+		//cv::drawMarker(dumpMap, point, cv::Scalar(0,255,0), cv::MARKER_DIAMOND, );
+		if(user_data) {
+			cv::circle(dumpMap, point, 1, cv::Scalar(0,255,0), 2);
+		}
+	}
+	if (user_data) {
+		cv::cvtColor(dumpMap, dumpMap, cv::COLOR_RGB2BGR);
+		cv::imwrite("/tmp/dumpOut.jpg", dumpMap);
+	}
 	detected_cb(source, numberOfPoseEstimation, locations.data(), user_data);
 
 	return ret;
@@ -884,17 +917,32 @@ int mv_inference_hand_detect_open(
 
 	numberOfOutputs = handDetectionResults.number_of_hands;
 
+	LOGW("numberOfOutputs: %d", numberOfOutputs);
 	float *confidences = handDetectionResults.confidences.data();
+	LOGW("done");
 	std::vector<mv_rectangle_s> locations(numberOfOutputs);
 
-	for (int n = 0; n < numberOfOutputs; ++n) {
-		locations[n].point.x = handDetectionResults.locations[n].x;
-		locations[n].point.y = handDetectionResults.locations[n].y;
-		locations[n].width = handDetectionResults.locations[n].width;
-		locations[n].height = handDetectionResults.locations[n].height;
+	LOGE("user_data:%p", user_data);
+	LOGE("outputTensorData:%p", handDetectionResults.outputTensorData);
+	if (user_data) {
+		locations.clear();
+		std::vector<mv_rectangle_s>().swap(locations);
+		if (handDetectionResults.outputTensorData) {
+			LOGW("try to get outputTensorData: %zd", sizeof(float));
+			memcpy(user_data, handDetectionResults.outputTensorData, sizeof(float)*(56*56*21));
+		} else {
+			LOGW("outputTensorData is NULL");
+		}
+	} else {
+		for (int n = 0; n < numberOfOutputs; ++n) {
+			locations[n].point.x = handDetectionResults.locations[n].x;
+			locations[n].point.y = handDetectionResults.locations[n].y;
+			locations[n].width = handDetectionResults.locations[n].width;
+			locations[n].height = handDetectionResults.locations[n].height;
+		}
 	}
 
-	detected_cb(source, numberOfOutputs, confidences, locations.data(), user_data);
+	detected_cb(source, numberOfOutputs, confidences, user_data == NULL ? locations.data() : NULL, user_data);
 
 	return ret;
-}
\ No newline at end of file
+}
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 2fdf73f4..9e15c792 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -25,11 +25,22 @@ BuildRequires: libavutil-devel
 BuildRequires: pkgconfig(gstreamer-1.0)
 BuildRequires: pkgconfig(gstreamer-base-1.0)
 BuildRequires: pkgconfig(gstreamer-app-1.0)
+BuildRequires: pkgconfig(gstreamer-video-1.0)
+BuildRequires: pkgconfig(cairo)
 BuildRequires: pkgconfig(libtzplatform-config)
 BuildRequires: pkgconfig(iniparser)
 BuildRequires: pkgconfig(ncurses)
 BuildRequires: pkgconfig(inference-engine-interface-common)
 
+BuildRequires:  pkgconfig(tizen-extension-client)
+BuildRequires:  pkgconfig(elementary)
+BuildRequires:  pkgconfig(ecore)
+BuildRequires:  pkgconfig(evas)
+BuildRequires:  pkgconfig(ecore-wl2)
+BuildRequires:  pkgconfig(ecore-evas)
+BuildRequires:  pkgconfig(appcore-efl)
+BuildRequires:  capi-ui-efl-util-devel
+
 %description
 Media Vision library for Tizen Native API. Includes barcode detecting, barcode generating, face and image modules.
 
@@ -279,6 +290,7 @@ install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj
 %TZ_SYS_BIN/mv_image*
 %TZ_SYS_BIN/mv_surveillance*
 %TZ_SYS_BIN/mv_infer*
+%TZ_SYS_BIN/mv_stream*
 
 %if 0%{?gcov:1}
 %files gcov
diff --git a/src/mv_common.c b/src/mv_common.c
index c01536d0..80be8779 100644
--- a/src/mv_common.c
+++ b/src/mv_common.c
@@ -80,6 +80,29 @@ int mv_source_fill_by_buffer(
 	return ret;
 }
 
+int mv_source_fill_by_tensor_buffer(
+		mv_source_h source,
+		void *data_buffer,
+		mv_inference_data_type_e type,
+		unsigned int buffer_size,
+		unsigned int width,
+		unsigned int height,
+		unsigned int channel,
+		unsigned int dimension)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_fill_by_tensor_buffer_c(
+		source, data_buffer, type, buffer_size, width, height,
+		channel, dimension);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_source_clear(
 		mv_source_h source)
 {
@@ -110,6 +133,23 @@ int mv_source_get_buffer(
 	return ret;
 }
 
+int mv_source_get_tensor_buffer(
+		mv_source_h source,
+		void **data_buffer,
+		unsigned int *buffer_size)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+	MEDIA_VISION_NULL_ARG_CHECK(buffer_size);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_get_tensor_buffer_c(source, data_buffer, buffer_size);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_source_get_height(
 		mv_source_h source,
 		unsigned int *image_height)
@@ -140,6 +180,36 @@ int mv_source_get_width(
 	return ret;
 }
 
+int mv_source_get_channel(
+		mv_source_h source,
+		unsigned int *channel)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(channel);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_get_channel_c(source, channel);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
+int mv_source_get_dimension(
+		mv_source_h source,
+		unsigned int *dims)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_NULL_ARG_CHECK(dims);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	int ret = mv_source_get_dimension_c(source, dims);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_source_get_colorspace(
 		mv_source_h source,
 		mv_colorspace_e *image_colorspace)
@@ -155,6 +225,19 @@ int mv_source_get_colorspace(
 	return ret;
 }
 
+
+bool mv_source_is_tensor(mv_source_h source)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+	bool ret = mv_source_is_tensor_c(source);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+}
+
 int mv_create_engine_config(
 		mv_engine_config_h *engine_cfg)
 {
diff --git a/src/mv_inference.c b/src/mv_inference.c
index 46fe8cb3..efeb49cd 100644
--- a/src/mv_inference.c
+++ b/src/mv_inference.c
@@ -318,6 +318,8 @@ int mv_inference_pose_estimation_detect(
 	mv_source_h source,
 	mv_inference_h infer,
 	mv_rectangle_s *roi,
+	float scale_width,
+	float scale_height,
 	mv_inference_pose_estimation_detected_cb detected_cb,
 	void *user_data)
 {
@@ -335,11 +337,41 @@ int mv_inference_pose_estimation_detect(
 	ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data);
 	*/
 #else
+	LOGE("%p", user_data);
+	ret = mv_inference_pose_estimation_detect_open(source, infer, roi, scale_width, scale_height, detected_cb, user_data);
 
-	ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data);
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+#endif
+}
+
+int mv_inference_hand_detect(
+	mv_source_h source,
+	mv_inference_h infer,
+	mv_inference_hand_detected_cb detected_cb,
+	void *user_data)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_INSTANCE_CHECK(infer);
+	MEDIA_VISION_NULL_ARG_CHECK(detected_cb);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+
+	int ret = MEDIA_VISION_ERROR_NONE;
+
+#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT
+	/*
+	ret = mv_inference_hand_detect_lic(source, infer, detected_cb, user_data);
+	*/
+#else
+
+	ret = mv_inference_hand_detect_open(source, infer, detected_cb, user_data);
 
 	MEDIA_VISION_FUNCTION_LEAVE();
 
 	return ret;
+
 #endif
 }
\ No newline at end of file
diff --git a/test/testsuites/CMakeLists.txt b/test/testsuites/CMakeLists.txt
index 389e6118..29d6cfb3 100644
--- a/test/testsuites/CMakeLists.txt
+++ b/test/testsuites/CMakeLists.txt
@@ -11,3 +11,4 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/face)
 add_subdirectory(${PROJECT_SOURCE_DIR}/image)
 add_subdirectory(${PROJECT_SOURCE_DIR}/surveillance)
 add_subdirectory(${PROJECT_SOURCE_DIR}/inference)
+add_subdirectory(${PROJECT_SOURCE_DIR}/stream_infer)
diff --git a/test/testsuites/inference/inference_test_suite.c b/test/testsuites/inference/inference_test_suite.c
index 648c085f..420ac981 100644
--- a/test/testsuites/inference/inference_test_suite.c
+++ b/test/testsuites/inference/inference_test_suite.c
@@ -31,6 +31,7 @@
 #include <limits.h>
 #include <time.h>
 
+
 #define FILE_PATH_SIZE 1024
 
 //Image Classification
@@ -67,6 +68,11 @@
 //Pose Estimation
 #define PE_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite"
 
+#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite"
+#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite"
+//#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_tf113_int32.tflite"
+//#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1_tf113.tflite"
+
 /******
  * Public model:
  *  IC: mobilenet caffe, tf?
@@ -82,6 +88,8 @@
 #define NANO_PER_MILLI  ((__clock_t) 1000000)
 #define MILLI_PER_SEC  ((__clock_t) 1000)
 
+static void * outputTensorData = NULL;
+
 struct timespec diff(struct timespec start, struct timespec end)
 {
     struct timespec temp;
@@ -166,21 +174,149 @@ void _pose_estimation_detected_cb (
     }
 }
 
+
+int perform_tflite_hand_detection2(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AIC_2_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
+void _hand_pose_cb (
+        mv_source_h source,
+        const int number_of_pose_estimation,
+        const mv_point_s *locations,
+        void *user_data)
+{
+    printf("In callback, %d pose estimation\n", number_of_pose_estimation);
+    if (!user_data) {
+        for (int n = 0; n < number_of_pose_estimation; n++) {
+            printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y);
+        }
+    } else {
+        printf("%p\n", user_data);
+    }
+}
+
 void _hand_detected_cb (
         mv_source_h source,
         const int number_of_hands,
         const float *confidences,
         const mv_rectangle_s *locations,
-        void *user_data)
+        void *user_data) //user_data  can be mv_source?
 {
     printf("In callback: %d hands\n", number_of_hands);
 
-    for (int n = 0; n < number_of_hands; n++) {
-        printf("%.3f\n", confidences[n]);
-        printf("%d,%d,%d,%d\n", locations[n].point.x,
-                                locations[n].point.y,
-                                locations[n].width,
-                                locations[n].height);
+    if (!user_data) {
+        for (int n = 0; n < number_of_hands; n++) {
+            printf("%.3f\n", confidences[n]);
+            printf("%d,%d,%d,%d\n", locations[n].point.x,
+                                    locations[n].point.y,
+                                    locations[n].width,
+                                    locations[n].height);
+        }
+    } else {
+        printf("%p\n", user_data);
+
+        mv_source_h source2;
+        mv_create_source(&source2);
+        mv_source_fill_by_tensor_buffer(source2, user_data, MV_INFERENCE_DATA_FLOAT32,
+                            56 * 56 * 21* sizeof(float),
+                            56, 56, 21, 3);
+
+        mv_engine_config_h engine_cfg2;
+        mv_create_engine_config(&engine_cfg2);
+
+        perform_tflite_hand_detection2(&engine_cfg2);
+
+        mv_inference_h infer2;
+        int err = mv_inference_create(&infer2);
+        if (err != MEDIA_VISION_ERROR_NONE) {
+            printf("Fail to create inference handle [err:%i]\n", err);
+        }
+        printf("infer2 created\n");
+
+        //configure
+        err = mv_inference_configure(infer2, engine_cfg2);
+        if (err != MEDIA_VISION_ERROR_NONE) {
+            printf("Fail to configure inference handle [err:%i]\n", err);
+        }
+        printf("engine_cfg2 configured\n");
+
+        //prepare
+        err = mv_inference_prepare(infer2);
+        printf("infer2 prepared\n");
+
+        err = mv_inference_pose_estimation_detect(source2, infer2, NULL, 225.f, 225.f,_hand_pose_cb, (&source));
+        printf("pose estimated\n");
+
+        mv_destroy_source(source2);
+        printf("destroy source2");
+
+        mv_inference_destroy(infer2);
+        mv_destroy_engine_config(engine_cfg2);
     }
 
 }
@@ -2552,7 +2688,7 @@ int perform_pose_estimation_detection()
             clock_gettime(CLOCK_MONOTONIC, &s_tspec);
 
             // Pose estimation
-            err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _pose_estimation_detected_cb, NULL);
+            err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, 1.f, 1.f, _pose_estimation_detected_cb, NULL);
 
             clock_gettime(CLOCK_MONOTONIC, &e_tspec);
 
@@ -2636,9 +2772,10 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
     char *inputNodeName = "input";
     char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
 
+    outputTensorData = (void*)calloc(56*56*21, sizeof(float));
     mv_engine_config_set_string_attribute(handle,
                         MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-                        HD_TFLITE_WEIGHT_PATH);
+                        PE_TFLITE_AIC_1_WEIGHT_PATH);
 
 	mv_engine_config_set_int_attribute(handle,
                         MV_INFERENCE_INPUT_DATA_TYPE,
@@ -2685,84 +2822,14 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
     return err;
 }
 
-int perform_armnn_hand_detection(mv_engine_config_h *engine_cfg)
-{
-    int err = MEDIA_VISION_ERROR_NONE;
-
-    mv_engine_config_h handle = NULL;
-    err = mv_create_engine_config(&handle);
-    if (err != MEDIA_VISION_ERROR_NONE) {
-        printf("Fail to create engine configuration handle.\n");
-        if (handle) {
-            int err2 = mv_destroy_engine_config(handle);
-            if (err2 != MEDIA_VISION_ERROR_NONE) {
-                printf("Fail to destroy engine cofniguration.\n");
-            }
-        }
-        return err;
-    }
-
-    char *inputNodeName = "input";
-    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
-
-    mv_engine_config_set_string_attribute(handle,
-                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-                        HD_TFLITE_WEIGHT_PATH);
-
-	mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_DATA_TYPE,
-                        MV_INFERENCE_DATA_FLOAT32);
-
-    mv_engine_config_set_double_attribute(handle,
-                        MV_INFERENCE_MODEL_MEAN_VALUE,
-                        0.0);
-
-    mv_engine_config_set_double_attribute(handle,
-                        MV_INFERENCE_MODEL_STD_VALUE,
-                        1.0);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_BACKEND_TYPE,
-                        MV_INFERENCE_BACKEND_ARMNN);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_TARGET_DEVICE_TYPE,
-                        MV_INFERENCE_TARGET_DEVICE_CPU);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
-                        224);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-                        224);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
-                        3);
-
-    mv_engine_config_set_string_attribute(handle,
-                        MV_INFERENCE_INPUT_NODE_NAME,
-                        inputNodeName);
-
-    mv_engine_config_set_array_string_attribute(handle,
-                        MV_INFERENCE_OUTPUT_NODE_NAMES,
-                        outputNodeNames,
-                        2);
-
-    *engine_cfg = handle;
-    return err;
-}
-
 int perform_hand_detection()
 {
     int err = MEDIA_VISION_ERROR_NONE;
 
     int sel_opt = 0;
-    const int options[6] = {1, 2, 3, 4, 5, 6};
-    const *names[6] = { "Configuration",
+    const int options[5] = {1, 2, 3, 4, 5};
+    const *names[5] = { "Configuration",
 						"TFLITE(CPU) + HandDetection",
-                        "ARMNN(CPU) + HandDetection",
                         "Prepare",
                         "Run",
                         "Back"};
@@ -2772,7 +2839,7 @@ int perform_hand_detection()
     mv_source_h mvSource = NULL;
 
     while(sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 6);
+        sel_opt = show_menu("Select Action:", options, names, 5);
         switch (sel_opt) {
         case 1:
         {
@@ -2798,17 +2865,6 @@ int perform_hand_detection()
         }
             break;
         case 3:
-        {
-            //perform pose estimation config
-            if (engine_cfg) {
-                int err2 = mv_destroy_engine_config(engine_cfg);
-                if (err2 != MEDIA_VISION_ERROR_NONE)
-                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-            }
-            err = perform_armnn_hand_detection(&engine_cfg);
-        }
-            break;
-        case 4:
         {
             // create - configure - prepare
             if (infer) {
@@ -2841,7 +2897,7 @@ int perform_hand_detection()
             }
         }
             break;
-        case 5:
+        case 4:
         {
             if (mvSource) {
                 int err2 = mv_destroy_source(mvSource);
@@ -2878,7 +2934,11 @@ int perform_hand_detection()
             clock_gettime(CLOCK_MONOTONIC, &s_tspec);
 
             // Hand detection
-			err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL);
+			//err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL);
+
+            printf("mem: %p\n", outputTensorData);
+            //err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _hand_pose_cb, outputTensorData);
+            err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, outputTensorData);
 
             clock_gettime(CLOCK_MONOTONIC, &e_tspec);
 
@@ -2888,7 +2948,7 @@ int perform_hand_detection()
 
             break;
         }
-        case 6:
+        case 5:
         {
             //perform destroy
             if (engine_cfg) {
@@ -2904,6 +2964,11 @@ int perform_hand_detection()
                     printf("Fail to destroy inference handle [err:%i]\n", err);
                 }
             }
+
+            if (outputTensorData) {
+                free(outputTensorData);
+                outputTensorData = NULL;
+            }
         }
             break;
         default:
@@ -2939,6 +3004,12 @@ int perform_hand_detection()
         sel_opt = (do_another == 1) ? 0 : 1;
     }
 
+    if (outputTensorData) {
+        free(outputTensorData);
+        outputTensorData = NULL;
+    }
+    printf("outputTensorData: %p\n",outputTensorData);
+
     return MEDIA_VISION_ERROR_NONE;
 }
 
@@ -2957,7 +3028,7 @@ int main()
 
     int err = MEDIA_VISION_ERROR_NONE;
     while (sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 6);
+        sel_opt = show_menu("Select Action:", options, names, 7);
         switch (sel_opt) {
         case 1:
         {
diff --git a/test/testsuites/stream_infer/CMakeLists.txt b/test/testsuites/stream_infer/CMakeLists.txt
new file mode 100644
index 00000000..dc0ab1c0
--- /dev/null
+++ b/test/testsuites/stream_infer/CMakeLists.txt
@@ -0,0 +1,63 @@
+project(mv_stream_infer)
+cmake_minimum_required(VERSION 2.6)
+
+set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG    _DEBUG)
+
+if(NOT SKIP_WARNINGS)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
+endif()
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR})
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+include_directories(${PROJECT_SOURCE_DIR})
+include_directories(${MV_CAPI_MEDIA_VISION_INC_DIR})
+include_directories(${INC_IMAGE_HELPER})
+include_directories(${INC_VIDEO_HELPER})
+include_directories(${INC_TS_COMMON})
+
+file(GLOB MV_STREAMINFER_TEST_SUITE_INC_LIST "${PROJECT_SOURCE_DIR}/*.h")
+file(GLOB MV_STREAMINFER_TEST_SUITE_SRC_LIST "${PROJECT_SOURCE_DIR}/*.c")
+
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(GLIB_PKG glib-2.0)
+
+if (NOT GLIB_PKG_FOUND)
+    message(SEND_ERROR "Failed to find glib")
+    return()
+else()
+    include_directories(${GLIB_PKG_INCLUDE_DIRS})
+endif()
+
+
+SET(dependents "gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0 cairo elementary ecore-wl2 appcore-efl capi-ui-efl-util")
+
+INCLUDE(FindPkgConfig)
+pkg_check_modules(${PROJECT_NAME} REQUIRED ${dependents})
+FOREACH(flag ${${PROJECT_NAME}_CFLAGS})
+  SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}")
+ENDFOREACH(flag)
+
+SET(CMAKE_C_FLAGS "-I./include -I./include/headers ${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} -fPIC -Wall -DEFL_BETA_API_SUPPORT=1")
+SET(CMAKE_C_FLAGS_DEBUG "-O0 -g")
+
+add_executable(${PROJECT_NAME}
+               ${MV_STREAMINFER_TEST_SUITE_INC_LIST}
+               ${MV_STREAMINFER_TEST_SUITE_SRC_LIST}
+               ${MV_CAPI_MEDIA_VISION_INC_LIST})
+
+target_link_libraries(${PROJECT_NAME} ${MV_INFERENCE_LIB_NAME}
+                                      gstreamer-1.0
+                                      glib-2.0
+                                      capi-system-info
+                                      dlog
+                                      mv_image_helper
+                                      mv_video_helper
+                                      mv_testsuite_common
+                                      cairo
+                                      m
+									  ${${PROJECT_NAME}_LIBRARIES}
+									  )
+
+install(TARGETS ${PROJECT_NAME} DESTINATION ${testbin_dir})
diff --git a/test/testsuites/stream_infer/stream_infer.c b/test/testsuites/stream_infer/stream_infer.c
new file mode 100644
index 00000000..dbd8b689
--- /dev/null
+++ b/test/testsuites/stream_infer/stream_infer.c
@@ -0,0 +1,1772 @@
+/**
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _USE_MATH_DEFINES
+#if 0
+#include <evemu.h>
+#endif
+#include <fcntl.h>
+#include <mv_common.h>
+#include <mv_inference.h>
+
+#include <mv_testsuite_common.h>
+
+#include <image_helper.h>
+#include <mv_video_helper.h>
+
+#include <mv_log_cfg.h>
+
+#include <math.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+#define MAX(a, b) \
+({ __typeof__ (a) _a = (a); \
+__typeof__ (b) _b = (b); \
+_a > _b ? _a : _b; })
+
+#define MIN(a,b) \
+({ __typeof__ (a) _a = (a); \
+__typeof__ (b) _b = (b); \
+_a < _b ? _a : _b; })
+
+#include <glib-2.0/glib.h>
+#include <gst/gst.h>
+#include <gst/video/video.h>
+#include <cairo.h>
+#include <cairo-gobject.h>
+
+#include <Elementary.h>
+#include <appcore-efl.h>
+#include <Ecore.h>
+#include <Ecore_Evas.h>
+#include <Ecore_Wl2.h>
+#include <tizen-extension-client-protocol.h>
+#include <efl_util.h>
+
+#include <gst/gst.h>
+#include <gst/video/videooverlay.h>
+#include <unistd.h>
+#include <time.h>
+
+#define WIDTH  (480)
+#define HEIGHT (270)
+
+#ifdef PACKAGE
+#undef PACKAGE
+#endif
+#define PACKAGE "test"
+
+static int st = 0;
+static Evas_Object *g_eo = NULL;
+static Evas_Object *icon = NULL;
+
+/* for video display */
+static Evas_Object *g_win_id;
+static Evas_Object *selected_win_id;
+
+typedef enum {
+	MODEL_TYPE_POSE_CPM = 0,
+	MODEL_TYPE_POSE_HOURGLASS,
+	MODEL_TYPE_POSE_HAND_AIC,
+	MODEL_TYPE_POSE_HAND_AICLite
+};
+
+typedef struct {
+	gchar *filename;
+	gchar *filename2;
+	int numbuffers;
+	int modelType;
+	Evas_Object *win;
+	Evas_Object *layout_main;	/* layout widget based on EDJ */
+	/* add more variables here */
+
+} appdata;
+
+static	mv_rectangle_s poseRoi;
+
+static appdata ad;
+static GstBus *bus;
+static guint bus_watch_id;
+
+#define FILE_PATH_SIZE 1024
+
+// pose estimation
+#define PE_TFLITE_CPM_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model_cpm.tflite"
+#define PE_TFLITE_HOURGLASS_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model_hourglass.tflite"
+
+#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite"
+#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite"
+#define PE_TFLITE_AICLite_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_lite_224.tflite"
+#define PE_TFLITE_AICLite_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_lite_224.tflite"
+
+static float thValNeck = 0.3f; // 15%
+static float thValArm = 0.1f; // 10 %
+static float thValLeg = 0.2f; // 5%
+
+typedef struct _rect {
+  int left;
+  int top;
+  int right;
+  int bottom;
+  int type;
+  bool updated;
+  bool cr_updated;
+} rect;
+
+
+typedef struct _humanSkeleton {
+  mv_point_s pose[21/*14*/];
+  mv_point_s prevPose[21/*14*/];
+  mv_rectangle_s loc;
+  mv_rectangle_s prevLoc;
+  mv_rectangle_s handRoi;
+  float scores[21/*14*/];
+  bool isPrevPose;
+  bool updated;    // detection is find and pose is also good. then update drawing
+  bool locUpdated; // track previous detection
+  bool IsDetected; // detection
+} HumanSkeleton;
+
+typedef struct
+{
+  gboolean valid;
+  GstVideoInfo vinfo;
+} CairoOverlayState;
+
+typedef struct
+{
+  GstBuffer *buffer;
+  gpointer user_data;
+} HandOffData;
+
+//gchar *gesturenames;
+
+static HandOffData hdata_p;
+
+static GMutex pose_mutex;
+static guint old_timeout = 0;
+static guint nFrames = 0;
+
+// Gstreamer
+GstElement *pipeline, *source, *filter, *toverlay, *sink, *sink2;
+GstElement *tee, *vscale, *vsfilter, *vconv, *vcfilter;
+GstElement *vrate, *vrfilter, *fsink, *vrsink;
+GstElement *queue1, *queue2, *queue3;
+GstElement *oconv, *coverlay;
+GstElement *vcrop, *vcrscale, *vcrsfilter, *vcrsconv, *vcrscfilter, *vcrssink;
+CairoOverlayState *overlay_state;
+
+GstElement *decodebin, *dscale, *dconv;
+GstElement *dsfilter, *dcfilter;
+
+GstElement *tee2, *enc, *muxmp4, *fsink2, *queue4, *queue5, *encconv;
+
+//static HandSkeleton handSkeleton;
+static HumanSkeleton humanSkeleton;
+gulong handler_p;
+GList *line_list = NULL;
+
+mv_source_h mv_src_p;
+mv_source_h mv_src_p2;
+
+// Human pose
+mv_engine_config_h hp_mv_engine_cfg;
+mv_inference_h hp_mv_infer;
+
+mv_engine_config_h hp_mv_engine_cfg2;
+mv_inference_h hp_mv_infer2;
+
+static void * outputTensorData;
+
+FILE *fp;
+
+static bool IsGestureMode;
+
+static int poseCropSize = 0;
+
+#define IMAGE_SIZE_WIDTH 640
+#define IMAGE_SIZE_HEIGHT 480
+
+#define NANO_PER_SEC ((__clock_t) 1000000000)
+#define NANO_PER_MILLI  ((__clock_t) 1000000)
+#define MILLI_PER_SEC  ((__clock_t) 1000)
+
+struct timespec diff(struct timespec start, struct timespec end)
+{
+    struct timespec temp;
+    if ((end.tv_nsec - start.tv_nsec) < 0) {
+        temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+        temp.tv_nsec = NANO_PER_SEC + end.tv_nsec - start.tv_nsec;
+    }
+    else {
+        temp.tv_sec = end.tv_sec - start.tv_sec;
+        temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+    }
+    return temp;
+}
+
+unsigned long gettotalmillisec(const struct timespec time)
+{
+    return time.tv_sec * MILLI_PER_SEC + time.tv_nsec / NANO_PER_MILLI;
+}
+
+
+void int_handler(int sig)
+{
+	char c;
+
+	signal(sig, SIG_IGN);
+	while ((getchar()) != '\n');
+
+	printf(TEXT_YELLOW "Do you want to quit? [y/n]\n" TEXT_RESET);
+	c = getchar();
+	if (c == 'y' || c == "Y") {
+
+		g_signal_handler_disconnect(vrsink, handler_p);
+#if 0
+		g_signal_handler_disconnect(vcrssink, handler_gp);
+#endif
+
+		gst_element_send_event(pipeline, gst_event_new_eos());
+
+		sleep(4);
+
+
+		if (mv_src_p)
+			mv_destroy_source(mv_src_p);
+
+		if (hp_mv_infer)
+			mv_inference_destroy(hp_mv_infer);
+
+		if (hp_mv_engine_cfg)
+			mv_destroy_engine_config(hp_mv_engine_cfg);
+
+		if (mv_src_p2)
+			mv_destroy_source(mv_src_p2);
+
+		if (hp_mv_infer2)
+			mv_inference_destroy(hp_mv_infer2);
+
+		if (hp_mv_engine_cfg)
+			mv_destroy_engine_config(hp_mv_engine_cfg2);
+
+		if (outputTensorData) {
+			free(outputTensorData);
+			outputTensorData = NULL;
+		}
+		printf(TEXT_YELLOW "exit..\n" TEXT_RESET);
+		signal(SIGINT, SIG_DFL);
+		exit(0);
+	} else {
+		printf("no");
+		signal(SIGINT, int_handler);
+	}
+
+	getchar(); // Get new line character
+}
+
+void _hand_pose_cb (
+        mv_source_h source,
+        const int number_of_pose_estimation,
+        const mv_point_s *locations,
+        void *user_data)
+{
+	printf("%d landmarks\n", number_of_pose_estimation);
+	for (int n = 0; n < number_of_pose_estimation; ++n) {
+
+		humanSkeleton.pose[n].x = (int)((float)locations[n].x);
+		humanSkeleton.pose[n].y = (int)((float)locations[n].y);
+		humanSkeleton.scores[n] = 1.0f; /* confidences[n];*/
+
+		//printf("(%d,%d): %f\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y, confidences[n]);
+		//printf("(%d,%d)\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y);
+	}
+	humanSkeleton.IsDetected = true;
+
+}
+
+static void _hand_detected_cb (
+        mv_source_h source,
+        const int number_of_hands,
+        const float *confidences,
+        const mv_rectangle_s *locations,
+        void *user_data) //user_data  can be mv_source?
+{
+
+#if 0
+	if (0 /*confidences[1] < thValNeck*/) {
+		printf("lost pose\n");
+		humanSkeleton.IsDetected = false;
+		humanSkeleton.isPrevPose = false;
+		return;
+	}
+	
+	printf("%d landmarks, %d crop\n", number_of_landmarks, poseCropSize);
+	for (int n = 0; n < number_of_landmarks; ++n) {
+
+		humanSkeleton.pose[n].x = (int)((float)(locations[n].x + poseRoi.point.x) / (float)poseCropSize * 640.f);
+		humanSkeleton.pose[n].y = (int)((float)(locations[n].y + poseRoi.point.y) / (float)poseCropSize * 480.f);
+		humanSkeleton.scores[n] = 1.0f; /* confidences[n];*/
+
+		//printf("(%d,%d): %f\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y, confidences[n]);
+		printf("(%d,%d)\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y);
+	}
+	humanSkeleton.IsDetected = true;
+#else
+
+	if (number_of_hands <= 0) {
+		humanSkeleton.IsDetected = false;
+		return;
+	}
+
+
+	struct timespec s_tspec;
+	struct timespec e_tspec;
+
+	clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+	mv_source_clear(mv_src_p2);
+	mv_source_fill_by_tensor_buffer(mv_src_p2, user_data,
+					MV_INFERENCE_DATA_FLOAT32,
+					56 * 56 * 21 * sizeof(float),
+					56, 56, 21, 3);
+
+	clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+
+	struct timespec diffspec = diff(s_tspec, e_tspec);
+	unsigned long timeDiff = gettotalmillisec(diffspec);
+	printf("memcpy time: %lu(ms)\n", timeDiff);
+
+	clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+	mv_inference_pose_estimation_detect(mv_src_p2, hp_mv_infer2, NULL, 640.f, 480.f, _hand_pose_cb, NULL);
+
+	clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+
+	diffspec = diff(s_tspec, e_tspec);
+	timeDiff = gettotalmillisec(diffspec);
+	printf("pose_estimation time: %lu(ms)\n", timeDiff);
+	//humanSkeleton.IsDetected = true;
+#endif
+	return;
+}
+
+
+static gboolean
+run_pose (void *user_data)
+{
+	HandOffData *udata = (HandOffData *)user_data;
+	if (!GST_IS_BUFFER(udata->buffer))
+		return FALSE;
+
+	GstMapInfo map;
+
+	/*
+	gst_buffer_map(udata->buffer, &map, GST_MAP_READ);
+
+	mv_source_clear(mv_src_p);
+
+	mv_source_fill_by_buffer(mv_src_p, map.data, 192*192*3, 192, 192, MEDIA_VISION_COLORSPACE_RGB888);
+
+	gst_buffer_unmap(udata->buffer, &map);
+
+	clock_t start = clock();
+	mv_inference_pose_estimation_detect(mv_src_p, hp_mv_infer, &poseRoi, 1.f, 1.f,  _human_pose_cb, NULL);
+	clock_t end = clock();
+	*/
+
+	gst_buffer_map(udata->buffer, &map, GST_MAP_READ);
+
+	mv_source_clear(mv_src_p);
+
+	mv_source_fill_by_buffer(mv_src_p, map.data, 224*224*3, 224, 224, MEDIA_VISION_COLORSPACE_RGB888);
+
+	gst_buffer_unmap(udata->buffer, &map);
+
+
+	struct timespec s_tspec;
+	struct timespec e_tspec;
+
+	void * outputTensorBuffer = (void*)udata->user_data;
+
+	clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+	// invoke tflite -> _hand_detected_cb -> memcpy output -> invoke tflite -> _pose_cb
+	mv_inference_hand_detect(mv_src_p, hp_mv_infer, _hand_detected_cb, outputTensorBuffer);
+
+	clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+	struct timespec diffspec = diff(s_tspec, e_tspec);
+	unsigned long timeDiff = gettotalmillisec(diffspec);
+	printf("detect + pose time: %lu(ms)\n", timeDiff);
+
+	return FALSE;
+
+}
+
+static void
+_pose_est_handoff(GstElement *object, GstBuffer *buffer, GstPad *pad, gpointer user_data)
+{
+
+	nFrames++;
+	hdata_p.buffer = buffer;
+	hdata_p.user_data = user_data;
+
+#if 0
+	if (nFrames % 15 == 0) {
+		g_mutex_lock(&pose_mutex);
+		g_idle_add (run_pose, &hdata_p);
+		g_mutex_unlock(&pose_mutex);
+	}
+#else
+	g_mutex_lock(&pose_mutex);
+	g_idle_add (run_pose, &hdata_p);
+	g_mutex_unlock(&pose_mutex);
+#endif
+}
+
+static void
+prepare_overlay (GstElement * overlay, GstCaps * caps, gpointer user_data)
+{
+	CairoOverlayState *state = (CairoOverlayState *) user_data;
+
+	state->valid = gst_video_info_from_caps (&state->vinfo, caps);
+}
+
+/*
+static void
+draw_overlay (GstElement * overlay, cairo_t * cr, guint64 timestamp,
+    guint64 duration, gpointer user_data)
+{
+	CairoOverlayState *s = (CairoOverlayState *) user_data;
+
+	if (!s->valid) {
+		printf("not ready draw_overlay");
+		return;
+	}
+
+	cairo_set_source_rgba(cr, 0.1, 0.9, 0.0, 0.7);
+	cairo_set_line_width(cr, 2.0);
+
+
+	if (!humanSkeleton.IsDetected)
+		return;
+
+
+	//
+	if (humanSkeleton.isPrevPose == false) {
+		humanSkeleton.prevPose[1] = humanSkeleton.pose[1];
+		// head - neck
+		if (humanSkeleton.scores[0] >= thValNeck ) {
+			humanSkeleton.prevPose[0] = humanSkeleton.pose[0];
+		}
+
+		// right arm
+		if (humanSkeleton.scores[2] >= thValArm) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[2] = humanSkeleton.pose[2];
+			if (humanSkeleton.scores[3] >= thValArm) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[3] = humanSkeleton.pose[3];
+				if (humanSkeleton.scores[4] >= thValArm) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[4] = humanSkeleton.pose[4];
+				}
+			}
+		}
+
+		// left arm
+		if (humanSkeleton.scores[5] >= thValArm) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[5] = humanSkeleton.pose[5];
+			if (humanSkeleton.scores[6] >= thValArm) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[6] = humanSkeleton.pose[6];
+				if (humanSkeleton.scores[7] >= thValArm) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[7] = humanSkeleton.pose[7];
+				}
+			}
+		}
+
+		// right leg
+		if (humanSkeleton.scores[8] >= thValLeg) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[8] = humanSkeleton.pose[8];
+			if (humanSkeleton.scores[9] >= thValLeg) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[9] = humanSkeleton.pose[9];
+				if (humanSkeleton.scores[10] >= thValLeg) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[10] = humanSkeleton.pose[10];
+				}
+			}
+		}
+
+		// left leg
+		if (humanSkeleton.scores[11] >= thValLeg) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[11] = humanSkeleton.pose[11];
+			if (humanSkeleton.scores[12] >= thValLeg) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[12] = humanSkeleton.pose[12];
+				if (humanSkeleton.scores[13] >= thValLeg) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[13] = humanSkeleton.pose[13];
+				}
+			}
+		}
+		humanSkeleton.isPrevPose = true;
+	} else {
+		// weighted sum of pose and prevPose
+		// method1: fixed weights (pose : prevPose = 0.7 : 0.3)
+		float poseWeight = 0.7f;
+		float prevPoseWeight = 0.3f;
+		humanSkeleton.prevPose[1].x = (poseWeight * humanSkeleton.pose[1].x +
+									 prevPoseWeight * humanSkeleton.prevPose[1].x);
+		humanSkeleton.prevPose[1].y = (poseWeight * humanSkeleton.pose[1].y +
+									 prevPoseWeight * humanSkeleton.prevPose[1].y);
+		// head - neck
+		if (humanSkeleton.scores[0] >= thValNeck ) {
+			humanSkeleton.prevPose[0].x = (poseWeight * humanSkeleton.pose[0].x +
+									 prevPoseWeight * humanSkeleton.prevPose[0].x);
+			humanSkeleton.prevPose[0].y = (poseWeight * humanSkeleton.pose[0].y +
+									 prevPoseWeight * humanSkeleton.prevPose[0].y);
+		}
+
+		// right arm
+		if (humanSkeleton.scores[2] >= thValArm) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[2].x = (poseWeight * humanSkeleton.pose[2].x +
+									 prevPoseWeight * humanSkeleton.prevPose[2].x);
+			humanSkeleton.prevPose[2].y = (poseWeight * humanSkeleton.pose[2].y +
+									 prevPoseWeight * humanSkeleton.prevPose[2].y);
+			if (humanSkeleton.scores[3] >= thValArm) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[3].x = (poseWeight * humanSkeleton.pose[3].x +
+									 prevPoseWeight * humanSkeleton.prevPose[3].x);
+				humanSkeleton.prevPose[3].y = (poseWeight * humanSkeleton.pose[3].y +
+									 prevPoseWeight * humanSkeleton.prevPose[3].y);
+				if (humanSkeleton.scores[4] >= thValArm) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[4].x = (poseWeight * humanSkeleton.pose[4].x +
+									 prevPoseWeight * humanSkeleton.prevPose[4].x);
+					humanSkeleton.prevPose[4].y = (poseWeight * humanSkeleton.pose[4].y +
+									 prevPoseWeight * humanSkeleton.prevPose[4].y);
+				}
+			}
+		}
+
+		// left arm
+		if (humanSkeleton.scores[5] >= thValArm) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[5].x = (poseWeight * humanSkeleton.pose[5].x +
+									 prevPoseWeight * humanSkeleton.prevPose[5].x);
+			humanSkeleton.prevPose[5].y = (poseWeight * humanSkeleton.pose[5].y +
+									 prevPoseWeight * humanSkeleton.prevPose[5].y);
+			if (humanSkeleton.scores[6] >= thValArm) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[6].x = (poseWeight * humanSkeleton.pose[6].x +
+									 prevPoseWeight * humanSkeleton.prevPose[6].x);
+				humanSkeleton.prevPose[6].y = (poseWeight * humanSkeleton.pose[6].y +
+									 prevPoseWeight * humanSkeleton.prevPose[6].y);
+				if (humanSkeleton.scores[7] >= thValArm) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[7].x = (poseWeight * humanSkeleton.pose[7].x +
+									 prevPoseWeight * humanSkeleton.prevPose[7].x);
+					humanSkeleton.prevPose[7].y = (poseWeight * humanSkeleton.pose[7].y +
+									 prevPoseWeight * humanSkeleton.prevPose[7].y);
+				}
+			}
+		}
+
+		// right leg
+		if (humanSkeleton.scores[8] >= thValLeg) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[8].x = (poseWeight * humanSkeleton.pose[8].x +
+									 prevPoseWeight * humanSkeleton.prevPose[8].x);
+			humanSkeleton.prevPose[8].y = (poseWeight * humanSkeleton.pose[8].y +
+									 prevPoseWeight * humanSkeleton.prevPose[8].y);
+			if (humanSkeleton.scores[9] >= thValLeg) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[9].x = (poseWeight * humanSkeleton.pose[9].x +
+									 prevPoseWeight * humanSkeleton.prevPose[9].x);
+				humanSkeleton.prevPose[9].y = (poseWeight * humanSkeleton.pose[9].y +
+									 prevPoseWeight * humanSkeleton.prevPose[9].y);
+				if (humanSkeleton.scores[10] >= thValLeg) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[10].x = (poseWeight * humanSkeleton.pose[10].x +
+									 prevPoseWeight * humanSkeleton.prevPose[10].x);
+					humanSkeleton.prevPose[10].y = (poseWeight * humanSkeleton.pose[10].y +
+									 prevPoseWeight * humanSkeleton.prevPose[10].y);
+				}
+			}
+		}
+
+		// left leg
+		if (humanSkeleton.scores[11] >= thValLeg) {
+			// neck - right shoulder
+			humanSkeleton.prevPose[11].x = (poseWeight * humanSkeleton.pose[11].x +
+									 prevPoseWeight * humanSkeleton.prevPose[11].x);
+			humanSkeleton.prevPose[11].y = (poseWeight * humanSkeleton.pose[11].y +
+									 prevPoseWeight * humanSkeleton.prevPose[11].y);
+			if (humanSkeleton.scores[12] >= thValLeg) {
+				// right shoulder - right elbow
+				humanSkeleton.prevPose[12].x = (poseWeight * humanSkeleton.pose[12].x +
+									 prevPoseWeight * humanSkeleton.prevPose[12].x);
+				humanSkeleton.prevPose[12].y = (poseWeight * humanSkeleton.pose[12].y +
+									 prevPoseWeight * humanSkeleton.prevPose[12].y);
+				if (humanSkeleton.scores[13] >= thValLeg) {
+					// right elbow - right wrist
+					humanSkeleton.prevPose[13].x = (poseWeight * humanSkeleton.pose[13].x +
+									 prevPoseWeight * humanSkeleton.prevPose[13].x);
+					humanSkeleton.prevPose[13].y = (poseWeight * humanSkeleton.pose[13].y +
+									 prevPoseWeight * humanSkeleton.prevPose[13].y);
+				}
+			}
+		}
+	}
+
+	//
+	//draw..
+	// head - neck
+	if (humanSkeleton.scores[0] >= thValNeck ) {
+		cairo_move_to(cr, humanSkeleton.prevPose[0].x, humanSkeleton.prevPose[0].y);
+		cairo_line_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+	}
+
+	// right arm
+	cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+	if (humanSkeleton.scores[2] >= thValArm) {
+		// neck - right shoulder
+		cairo_line_to(cr, humanSkeleton.prevPose[2].x, humanSkeleton.prevPose[2].y);
+		if (humanSkeleton.scores[3] >= thValArm) {
+			// right shoulder - right elbow
+			cairo_line_to(cr, humanSkeleton.prevPose[3].x, humanSkeleton.prevPose[3].y);
+			if (humanSkeleton.scores[4] >= thValArm) {
+				// right elbow - right wrist
+				cairo_line_to(cr, humanSkeleton.prevPose[4].x, humanSkeleton.prevPose[4].y);
+			}
+		}
+	}
+	cairo_stroke(cr);
+
+	// left arm
+	cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+	if (humanSkeleton.scores[5] >= thValArm) {
+		// neck - right shoulder
+		cairo_line_to(cr, humanSkeleton.prevPose[5].x, humanSkeleton.prevPose[5].y);
+		if (humanSkeleton.scores[6] >= thValArm) {
+			// right shoulder - right elbow
+			cairo_line_to(cr, humanSkeleton.prevPose[6].x, humanSkeleton.prevPose[6].y);
+			if (humanSkeleton.scores[7] >= thValArm) {
+				// right elbow - right wrist
+				cairo_line_to(cr, humanSkeleton.prevPose[7].x, humanSkeleton.prevPose[7].y);
+			}
+		}
+	}
+	cairo_stroke(cr);
+
+
+	// right leg
+	cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+	if (humanSkeleton.scores[8] >= thValLeg) {
+		// neck - right shoulder
+		cairo_line_to(cr, humanSkeleton.prevPose[8].x, humanSkeleton.prevPose[8].y);
+		if (humanSkeleton.scores[9] >= thValLeg) {
+			// right shoulder - right elbow
+			cairo_line_to(cr, humanSkeleton.prevPose[9].x, humanSkeleton.prevPose[9].y);
+			if (humanSkeleton.scores[10] >= thValLeg) {
+				// right elbow - right wrist
+				cairo_line_to(cr, humanSkeleton.prevPose[10].x, humanSkeleton.prevPose[10].y);
+			}
+		}
+	}
+	cairo_stroke(cr);
+
+	// left leg
+	cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+	if (humanSkeleton.scores[11] >= thValLeg) {
+		// neck - right shoulder
+		cairo_line_to(cr, humanSkeleton.prevPose[11].x, humanSkeleton.prevPose[11].y);
+		if (humanSkeleton.scores[12] >= thValLeg) {
+			// right shoulder - right elbow
+			cairo_line_to(cr, humanSkeleton.prevPose[12].x, humanSkeleton.prevPose[12].y);
+			if (humanSkeleton.scores[13] >= thValLeg) {
+				// right elbow - right wrist
+				cairo_line_to(cr, humanSkeleton.prevPose[13].x, humanSkeleton.prevPose[13].y);
+			}
+		}
+	}
+	cairo_stroke(cr);
+}
+*/
+static void
+draw_overlay_hand (GstElement * overlay, cairo_t * cr, guint64 timestamp,
+    guint64 duration, gpointer user_data)
+{
+	CairoOverlayState *s = (CairoOverlayState *) user_data;
+
+	if (!s->valid) {
+		printf("not ready draw_overlay");
+		return;
+	}
+
+	cairo_set_source_rgba(cr, 0.1, 0.9, 0.0, 0.7);
+	cairo_set_line_width(cr, 2.0);
+
+
+	if (!humanSkeleton.IsDetected)
+		return;
+
+
+    // thumb - red
+	cairo_set_source_rgba (cr, 0.9, 0.1, 0.0, 0.7);
+	cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+	for (int k = 1 ; k < 5; ++k) {
+		cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+	}
+	cairo_stroke(cr);
+
+	// fore - red
+	cairo_set_source_rgba (cr, 0.9, 0.1, 0.0, 0.7);
+	cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+	for (int k = 5 ; k < 9; ++k) {
+		cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+	}
+	cairo_stroke(cr);
+
+	// middle - grean
+	cairo_set_source_rgba (cr, 0.1, 0.9, 0.0, 0.7);
+	cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+	for (int k = 9 ; k < 13; ++k) {
+		cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+	}
+	cairo_stroke(cr);
+
+	// ring - blue
+	cairo_set_source_rgba (cr, 0.1, 0.0, 0.9, 0.7);
+	cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+	for (int k = 13 ; k < 17; ++k) {
+		cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+	}
+	cairo_stroke(cr);
+
+	// little - purple
+	cairo_set_source_rgba (cr, 0.5, 0.0, 0.5, 0.7);
+	cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+	for (int k = 17 ; k < 21; ++k) {
+		cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+	}
+	cairo_stroke(cr);
+}
+
+static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data)
+{
+
+  switch (GST_MESSAGE_TYPE (msg)) {
+
+    case GST_MESSAGE_EOS:
+      printf ("End of stream\n");
+      break;
+
+    case GST_MESSAGE_ERROR: {
+      gchar  *debug;
+      GError *error;
+
+      gst_message_parse_error (msg, &error, &debug);
+      g_free (debug);
+
+      printf ("Error: %s\n", error->message);
+      g_error_free (error);
+
+      break;
+    }
+    default:
+      break;
+  }
+
+  return TRUE;
+}
+
+int perform_armnn_human_pose_cpm_configure(mv_engine_config_h mv_engine_cfg)
+{
+	if (mv_engine_cfg == NULL) {
+		printf("mv_engine_cfg is null\n");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+	char *inputNodeName = "image";
+    char *outputNodeName[1] = {"Convolutional_Pose_Machine/stage_5_out"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_CPM_WEIGHT_PATH);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_ARMNN);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_TYPE,
+                        MV_INFERENCE_TARGET_GPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeName,
+                        1);
+
+	return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_armnn_human_pose_hourglass_configure(mv_engine_config_h mv_engine_cfg)
+{
+	if (mv_engine_cfg == NULL) {
+		printf("mv_engine_cfg is null\n");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+	char *inputNodeName = "image";
+    char *outputNodeName[1] = {"hourglass_out_3"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_HOURGLASS_WEIGHT_PATH);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_ARMNN);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_TYPE,
+                        MV_INFERENCE_TARGET_GPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeName,
+                        1);
+
+	return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AIC(mv_engine_config_h mv_engine_cfg)
+{
+	if (mv_engine_cfg == NULL) {
+		printf("mv_engine_cfg is null\n");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
+
+    //outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AIC_1_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AIC2(mv_engine_config_h mv_engine_cfg)
+{
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AIC_2_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AICLite(mv_engine_config_h mv_engine_cfg)
+{
+	if (mv_engine_cfg == NULL) {
+		printf("mv_engine_cfg is null\n");
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox", "mobilenetv2/heatmap"};
+
+    //outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite_1_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AICLite2(mv_engine_config_h mv_engine_cfg)
+{
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite_2_WEIGHT_PATH);
+
+	mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+static void __global(void *data, struct wl_registry *registry,
+	uint32_t name, const char *interface, uint32_t version)
+{
+	struct tizen_surface **tz_surface = NULL;
+
+	if (!data) {
+		printf("NULL data\n");
+		return;
+	}
+
+	tz_surface = (struct tizen_surface **)data;
+
+	if (!interface) {
+		printf("NULL interface\n");
+		return;
+	}
+
+	if (strcmp(interface, "tizen_surface") == 0) {
+		printf("binding tizen surface for wayland\n");
+
+		*tz_surface = wl_registry_bind(registry, name, &tizen_surface_interface, 1);
+		if (*tz_surface == NULL)
+			printf("failed to bind\n");
+
+		printf("done\n");
+	}
+
+	return;
+}
+
+static void __global_remove(void *data, struct wl_registry *wl_registry, uint32_t name)
+{
+	printf("enter\n");
+	return;
+}
+
+static const struct wl_registry_listener _wl_registry_listener = {
+	__global,
+	__global_remove
+};
+
+void __parent_id_getter(void *data, struct tizen_resource *tizen_resource, uint32_t id)
+{
+	if (!data) {
+		printf("NULL data\n");
+		return;
+	}
+
+	*((unsigned int *)data) = id;
+
+	printf("[CLIENT] got parent_id [%u] from server\n", id);
+
+	return;
+}
+
+static const struct tizen_resource_listener _tz_resource_listener = {
+	__parent_id_getter
+};
+
+static void set_overlay(Ecore_Evas *ee)
+{
+   Ecore_Wl2_Window *window = NULL;
+   Ecore_Wl2_Display *e_wl2_display = NULL;
+
+   struct wl_display *display = NULL;
+   struct wl_display *display_wrapper = NULL;
+   struct wl_surface *surface = NULL;
+   struct wl_registry *registry = NULL;
+   struct wl_event_queue *queue = NULL;
+   struct tizen_surface *tz_surface = NULL;
+   struct tizen_resource *tz_resource = NULL;
+
+	window = ecore_evas_wayland2_window_get(ee);
+	if (!window) {
+		printf("failed to get wayland window\n");
+		goto _DONE;
+	}
+
+	/* set video_has flag to a video application window */
+	ecore_wl2_window_video_has(window, EINA_TRUE);
+
+	surface = (struct wl_surface *)ecore_wl2_window_surface_get(window);
+	if (!surface) {
+		printf("failed to get wayland surface\n");
+		goto _DONE;
+	}
+
+	e_wl2_display = ecore_wl2_connected_display_get(NULL);
+	if (!e_wl2_display) {
+		printf("failed to get ecore wl2 display\n");
+		goto _DONE;
+	}
+
+	display = (struct wl_display *)ecore_wl2_display_get(e_wl2_display);
+	if (!display) {
+		printf("failed to get wayland display\n");
+		goto _DONE;
+	}
+
+	display_wrapper = wl_proxy_create_wrapper(display);
+	if (!display_wrapper) {
+		printf("failed to create wl display wrapper\n");
+	}
+
+	queue = wl_display_create_queue(display);
+	if (!queue) {
+		printf("failed to create wl display queue\n");
+		goto _DONE;
+	}
+
+	wl_proxy_set_queue((struct wl_proxy *)display_wrapper, queue);
+
+	registry = wl_display_get_registry(display_wrapper);
+	if (!registry) {
+		printf("failed to get wayland registry\n");
+		goto _DONE;
+	}
+
+	wl_registry_add_listener(registry, &_wl_registry_listener, &tz_surface);
+
+	wl_display_dispatch_queue(display, queue);
+	wl_display_roundtrip_queue(display, queue);
+
+	if (!tz_surface) {
+		printf("failed to get tizen surface\n");
+		goto _DONE;
+	}
+
+	/* Get parent_id which is unique in a entire systemw. */
+	tz_resource = tizen_surface_get_tizen_resource(tz_surface, surface);
+	if (!tz_resource) {
+		printf("failed to get tizen resurce\n");
+		goto _DONE;
+	}
+
+	int parent_id = 0;
+
+	tizen_resource_add_listener(tz_resource, &_tz_resource_listener, &parent_id);
+
+	wl_display_roundtrip_queue(display, queue);
+
+	if (parent_id > 0) {
+		printf("parent id : %u\n", parent_id);
+
+	gst_video_overlay_set_wl_window_wl_surface_id(GST_VIDEO_OVERLAY(sink2), parent_id);
+    gst_element_set_state (pipeline, GST_STATE_PLAYING);
+	} else {
+		printf("failed to get parent id\n");
+	}
+
+_DONE:
+	if (tz_surface) {
+		tizen_surface_destroy(tz_surface);
+		tz_surface = NULL;
+	}
+
+	if (tz_resource) {
+		tizen_resource_destroy(tz_resource);
+		tz_resource = NULL;
+	}
+
+	if (registry) {
+		wl_registry_destroy(registry);
+		registry = NULL;
+	}
+
+	if (queue) {
+		wl_event_queue_destroy(queue);
+		queue = NULL;
+	}
+
+	if (display_wrapper) {
+		wl_proxy_wrapper_destroy(display_wrapper);
+		display_wrapper = NULL;
+	}
+}
+
+static void win_resize_cb (void *data, Evas *e, Evas_Object *obj, void *event_info)
+{
+   int w, h;
+   int wid;
+
+   printf("_canvas_resize_cb\n");
+
+   Evas * _e = evas_object_evas_get(obj);
+   Ecore_Evas *ee = ecore_evas_ecore_evas_get(_e);
+   ecore_evas_geometry_get(ee, NULL, NULL, &w, &h);
+
+   if (!st) {
+    set_overlay(ee);
+    int x, y;
+    elm_win_screen_position_get(obj, &x, &y);
+    printf("x = %d, y = %d\n", x, y);
+	} else
+	    st++;
+}
+
+static void cb_new_pad (GstElement *element, GstPad *pad, gpointer data)
+{
+  gchar *name;
+  GstElement *other = data;
+
+  name = gst_pad_get_name (pad);
+  printf ("A new pad %s was created for %s\n", name, gst_element_get_name(element));
+  g_free (name);
+
+  printf ("element %s will be linked to %s\n",
+           gst_element_get_name(element),
+           gst_element_get_name(dscale));
+  gst_element_link(element, dscale);
+}
+
+static int app_create(void *data)
+{
+	appdata *ad = data;
+	Evas_Object *win = NULL;
+
+	// GST
+	g_mutex_init(&pose_mutex);
+
+	signal(SIGINT, int_handler);
+
+	/* initialization */
+	gst_init(NULL, NULL);
+
+	/* mediavision configuration*/
+	IsGestureMode = false;
+	if (hp_mv_engine_cfg == NULL) {
+		mv_create_engine_config(&hp_mv_engine_cfg);
+	}
+
+	if (hp_mv_engine_cfg2 == NULL) {
+		mv_create_engine_config(&hp_mv_engine_cfg2);
+	}
+
+	int err = MEDIA_VISION_ERROR_NONE;
+
+
+	if (ad->modelType == MODEL_TYPE_POSE_CPM) {
+		err = perform_armnn_human_pose_cpm_configure(hp_mv_engine_cfg);
+	} else if (ad->modelType == MODEL_TYPE_POSE_HOURGLASS) {
+		err = perform_armnn_human_pose_hourglass_configure(hp_mv_engine_cfg);
+	} else if (ad->modelType == MODEL_TYPE_POSE_HAND_AIC) {
+		outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+		err = perform_tflite_hand_detection_AIC(hp_mv_engine_cfg);
+
+		err = perform_tflite_hand_detection_AIC2(hp_mv_engine_cfg2);
+	} else {
+		outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+		err = perform_tflite_hand_detection_AICLite(hp_mv_engine_cfg);
+
+		err = perform_tflite_hand_detection_AICLite2(hp_mv_engine_cfg2);
+	}
+
+	if (err != MEDIA_VISION_ERROR_NONE) {
+		printf("Error on perform_armnn_human_pose_configure");
+	}
+
+	printf("configuration done\n");
+
+	printf("loading pose model: ");
+	mv_inference_create(&hp_mv_infer);
+
+	mv_inference_configure(hp_mv_infer, hp_mv_engine_cfg);
+
+	clock_t start = clock();
+	mv_inference_prepare(hp_mv_infer);
+	clock_t end = clock();
+	printf("time: %2.3f\n", (double)(end - start)/CLOCKS_PER_SEC);
+
+
+	mv_inference_create(&hp_mv_infer2);
+
+	mv_inference_configure(hp_mv_infer2, hp_mv_engine_cfg2);
+
+	start = clock();
+	mv_inference_prepare(hp_mv_infer2);
+	end = clock();
+	printf("time: %2.3f\n", (double)(end - start)/CLOCKS_PER_SEC);
+
+	/* mediavision source */
+	mv_create_source(&mv_src_p);
+	mv_create_source(&mv_src_p2);
+
+	pipeline = gst_pipeline_new("app");
+
+	overlay_state = g_new0 (CairoOverlayState, 1);
+
+	/* create gstreamer elements */
+	if (!ad->filename) {
+		source = gst_element_factory_make("v4l2src", "src");
+		filter = gst_element_factory_make("capsfilter", "filter");
+	} else {
+		source = gst_element_factory_make("filesrc", "src");
+		
+		decodebin = gst_element_factory_make("decodebin", "dbin");
+		dscale = gst_element_factory_make("videoscale", "dscale");
+		dconv = gst_element_factory_make("videoconvert", "dconv");
+		dsfilter = gst_element_factory_make("capsfilter", "dsfilter");
+		dcfilter = gst_element_factory_make("capsfilter", "dcfilter");
+	}
+
+	if (ad->filename2) {
+		tee2 = gst_element_factory_make("tee", "tee2");
+		enc = gst_element_factory_make("avenc_mpeg4", "enc");
+		muxmp4 = gst_element_factory_make("mp4mux", "muxmp4");
+		fsink2 = gst_element_factory_make("filesink", "fsink2");
+		queue4 = gst_element_factory_make("queue", "queue4");
+		queue5 = gst_element_factory_make("queue", "queue5");
+		encconv = gst_element_factory_make("videoconvert", "encconv");
+	}
+
+
+	tee = gst_element_factory_make("tee", "tee");
+	queue1 = gst_element_factory_make("queue", "queue1");
+	queue2 = gst_element_factory_make("queue", "queue2");
+
+	if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) {
+		queue3 = gst_element_factory_make("queue", "queue3");
+	}
+
+	// queue1 - videoscale - capsfilter -viedoeconvert - capsfilter - videorate - capsfilter -fakesink
+	vscale = gst_element_factory_make("videoscale", "scale");
+	vsfilter = gst_element_factory_make("capsfilter", "vsfilter");
+	vconv = gst_element_factory_make("videoconvert", "convert");
+	vcfilter = gst_element_factory_make("capsfilter", "vcfilter");
+	vrate = gst_element_factory_make("videorate", "rate");
+	vrfilter = gst_element_factory_make("capsfilter", "vrfilter");
+	vrsink = gst_element_factory_make("fakesink", "vrsink");
+
+	// queue2 - videoconvert - cairooveray - tizenwlsink
+	oconv = gst_element_factory_make("videoconvert", "oconv");
+	coverlay = gst_element_factory_make("cairooverlay", "coverlay");
+	sink = gst_element_factory_make("fpsdisplaysink", "vsink");
+	sink2 = gst_element_factory_make("tizenwlsink", "vsink2");
+
+	// after detection, crop using video crop
+	// queue3 - videocrop - videoscale -capsfilter - videoconvert - capsfilter -fakesink
+
+	if (0/*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) {
+		vcrop = gst_element_factory_make("videocrop", "crop");
+		vcrscale = gst_element_factory_make("videoscale", "crscale");
+		vcrsfilter = gst_element_factory_make("capsfilter", "vcrsfilter");
+		vcrsconv = gst_element_factory_make("videoconvert", "vcrsconvert");
+		vcrscfilter = gst_element_factory_make("capsfilter", "vcrscfilter");
+		vcrssink = gst_element_factory_make("fakesink", "vcrssink");
+	}
+
+	if (!pipeline || !source ||
+		!tee || !queue1 || !vscale || !vsfilter || !vconv || !vcfilter ||
+		!vrate || !vrfilter || !vrsink ||
+		!queue2 || !oconv || !coverlay || !sink || !sink2) {
+		printf(TEXT_RED "One element(queue1 or queue2) might be not created. Exiting.\n" TEXT_RESET);
+		return -1;
+	}
+
+	if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC && (!pipeline || !queue3 
+		|| !vcrop || !vcrscale || !vcrsfilter || !vcrsconv || !vcrscfilter || !vcrssink*/) {
+		printf(TEXT_RED "One element(queue3) might be not created. Exiting.\n" TEXT_RESET);
+		return -1;
+	}
+
+	if (!ad->filename) {
+		if (!filter) {
+			printf(TEXT_RED "One element might be not created. Existing.\n" TEXT_RESET);
+			return -1;
+		}
+		g_object_set(G_OBJECT(filter), "caps", gst_caps_from_string("video/x-raw, format=YUY2, width=640, height=480"), NULL);
+	} else {
+		if (!decodebin || !dscale || !dconv) {
+			printf(TEXT_RED "One element might be not created. Exiting.\n" TEXT_RESET);
+			return -1;
+		}
+		g_object_set(G_OBJECT(dsfilter), "caps", gst_caps_from_string("video/x-raw, width=640, height=480"), NULL);
+		g_object_set(G_OBJECT(dcfilter), "caps", gst_caps_from_string("video/x-raw, format=YUY2, width=640, height=480"), NULL);
+		g_signal_connect (decodebin, "pad-added", G_CALLBACK (cb_new_pad), NULL);
+	}
+
+	if (ad->filename2) {
+		if (!tee2 || !enc || !muxmp4 || !queue4 || !queue5 || !fsink2 || !encconv) {
+			printf(TEXT_RED "One element might be not created. Exiting.\n" TEXT_RESET);
+			printf("%p, %p, %p, %p, %p, %p, %p\n", tee2, enc, muxmp4, queue4, queue5, fsink2, encconv);
+			return -1;
+		}
+		g_object_set(G_OBJECT(fsink2), "location", ad->filename2, NULL);
+
+	}
+
+	/* set up the pipeline */
+	//g_signal_connect (coverlay, "draw", G_CALLBACK (draw_overlay), overlay_state);
+	g_signal_connect (coverlay, "draw", G_CALLBACK (draw_overlay_hand), overlay_state);
+	
+	g_signal_connect (coverlay, "caps-changed", G_CALLBACK (prepare_overlay), overlay_state);
+
+	if (!ad->filename) {
+		g_object_set(G_OBJECT(source), "device", "/dev/video8", NULL); // 252
+	} else {
+		g_object_set(G_OBJECT(source), "location", ad->filename, NULL);
+		g_object_set(G_OBJECT(source), "num-buffers", ad->numbuffers, NULL);
+	}
+
+	g_object_set(G_OBJECT(sink2), "use-tbm", FALSE, NULL);
+	g_object_set(G_OBJECT(sink2), "sync", FALSE, NULL);
+	g_object_set(G_OBJECT(sink), "video-sink", sink2, NULL);
+	g_object_set(G_OBJECT(sink), "sync", FALSE, NULL);
+
+	if (ad->modelType == MODEL_TYPE_POSE_HAND_AIC ||
+		ad->modelType == MODEL_TYPE_POSE_HAND_AICLite) {
+		g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=224, height=224"), NULL);
+		poseCropSize = 224;
+	} else {
+		g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=192, height=192"), NULL);
+		poseCropSize = 192;
+	}
+
+	g_object_set(G_OBJECT(vcfilter), "caps", gst_caps_from_string("video/x-raw, format=RGB"), NULL);
+	g_object_set(G_OBJECT(vrfilter), "caps", gst_caps_from_string("video/x-raw, framerate=15/1"), NULL);
+
+	//g_object_set(G_OBJECT(vrate), "drop-only", TRUE, NULL);
+
+	//g_object_set(G_OBJECT(queue2), "leaky", 2, NULL);
+#if 0
+	g_object_set(G_OBJECT(queue3), "max-size-buffers", 0, NULL);
+	g_object_set(G_OBJECT(queue3), "max-size-time", 0, NULL);
+	g_object_set(G_OBJECT(queue3), "max-size-bytes", 0, NULL);
+#endif
+	//g_object_set(G_OBJECT(queue3), "leaky", 2, NULL);
+
+	// here to be continue
+	printf("vrsink signal-handoffs\n");
+	g_object_set(G_OBJECT(vrsink), "signal-handoffs", TRUE, NULL);
+	handler_p = g_signal_connect (vrsink, "handoff", G_CALLBACK(_pose_est_handoff), outputTensorData);
+	g_object_set(G_OBJECT(vrsink), "sync", FALSE, NULL);
+
+
+	g_object_set(G_OBJECT(vcrssink), "sync", FALSE, NULL);
+
+
+	/*  add a message handler */
+	bus = gst_pipeline_get_bus (GST_PIPELINE(pipeline));
+	bus_watch_id = gst_bus_add_watch(bus, bus_call, NULL);
+	gst_object_unref(bus);
+
+	/* add elements into the pipeline */
+	gst_bin_add_many(GST_BIN(pipeline),
+					source,
+					tee, queue1, vscale, vsfilter, vconv, vcfilter,
+					vrate, vrfilter, vrsink,
+					queue2, oconv, coverlay, sink,
+					NULL);
+
+	if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) {
+		gst_bin_add_many(GST_BIN(pipeline),
+					queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink,
+					NULL);
+		gst_element_link_many (tee, queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL);
+	}
+
+	/* link elements */
+	if (!ad->filename) {
+		gst_bin_add(GST_BIN(pipeline), filter);
+		gst_element_link_many(source, filter, tee, NULL);
+	} else {
+		gst_bin_add_many(GST_BIN(pipeline), decodebin, dscale, dconv, dsfilter, dcfilter, NULL);
+		gst_element_link_many(source, decodebin, NULL);
+		gst_element_link_many(dscale, dsfilter, dconv, dcfilter, tee, NULL);
+	}
+
+
+	if (!ad->filename2) {
+		// display
+		gst_element_link_many (tee, queue2, oconv, coverlay, /*toverlay,*/ sink, NULL);
+		// pose
+		gst_element_link_many (tee, queue1, vrate, vrfilter, vconv, vcfilter, vscale, vsfilter, vrsink, NULL);
+	} else {
+
+		gst_bin_add_many(GST_BIN(pipeline), tee2, enc, muxmp4, fsink2, queue4, queue5, encconv);
+
+		// display
+		gst_element_link_many (tee, queue2, oconv, coverlay, tee2, NULL);
+		gst_element_link_many (tee2, queue4, sink, NULL);
+		gst_element_link_many (tee2, queue5, encconv, enc, muxmp4, fsink2, NULL);
+
+		// pose
+		gst_element_link_many (tee, queue1, vrate, vrfilter, vconv, vcfilter, vscale, vsfilter, vrsink, NULL);
+	}
+	
+	/* set the pipeline state to "playing" state */
+	//gst_element_set_state(pipeline, GST_STATE_PLAYING);
+
+	/* loop */
+	humanSkeleton.IsDetected = false;
+	humanSkeleton.isPrevPose = false;
+	printf(TEXT_GREEN "Running.....\n" TEXT_RESET);
+	// GST_END
+#if 0
+	/* use gl backend */
+	elm_config_accel_preference_set("opengl");
+
+	/* create window */
+	//win = elm_win_add(NULL, PACKAGE, ELM_WIN_SPLASH );
+	win = elm_win_add(NULL, PACKAGE, ELM_WIN_BASIC);
+	if (win) {
+		elm_win_title_set(win, PACKAGE);
+		elm_win_borderless_set(win, EINA_TRUE);
+		elm_win_autodel_set(win, EINA_TRUE);
+		elm_win_alpha_set(win, EINA_FALSE);
+	  evas_object_show(win);
+	}
+	elm_win_layer_set(win, 9);
+	elm_win_prop_focus_skip_set(win, EINA_TRUE);
+
+	ad->win = win;
+	g_win_id = win;
+	selected_win_id = g_win_id;
+
+	Evas_Object *bg = elm_bg_add(win);
+	elm_win_resize_object_add(win, bg);
+	evas_object_size_hint_min_set(bg, WIDTH, HEIGHT);
+	evas_object_size_hint_max_set(bg, WIDTH, HEIGHT);
+	evas_object_show(bg);
+
+	elm_win_activate(win);
+
+
+	evas_object_event_callback_add(win, EVAS_CALLBACK_RESIZE, win_resize_cb, NULL);
+#else
+	gst_element_set_state (pipeline, GST_STATE_PLAYING);
+#endif
+	return 0;
+}
+
+
+static int app_terminate(void *data)
+{
+	appdata *ad = data;
+	int i = 0;
+
+	/* out of loop */
+	printf(TEXT_GREEN "Stopping.....\n" TEXT_RESET);
+	gst_element_set_state(pipeline, GST_STATE_NULL);
+
+	printf(TEXT_GREEN "pipe unref.....\n" TEXT_RESET);
+	gst_object_unref(GST_OBJECT(pipeline));
+
+	printf(TEXT_GREEN "remove bus_watch id.....\n" TEXT_RESET);
+	g_source_remove(bus_watch_id);
+
+	if (overlay_state != NULL) {
+		printf(TEXT_GREEN "g_free overlay.....\n" TEXT_RESET);
+		g_free(overlay_state);
+	}
+
+	g_mutex_clear(&pose_mutex);
+	printf(TEXT_GREEN "done.....\n" TEXT_RESET);
+
+	if (g_win_id) {
+		evas_object_del(g_win_id);
+		g_win_id = NULL;
+	}
+	ad->win = NULL;
+	selected_win_id = NULL;
+
+	return 0;
+}
+struct appcore_ops ops = {
+	.create = app_create,
+	.terminate = app_terminate,
+};
+
+int main (int argc, char *argv[])
+{
+	memset(&ad, 0x0, sizeof(appdata));
+	ops.data = &ad;
+
+	if (argc >= 6) {
+		ad.filename = g_strdup(argv[5]);
+		printf("launch with file source (%s)\n", ad.filename);
+		if (argc > 6) {
+			ad.filename2 = g_strdup(argv[6]);
+			ad.numbuffers = -1;
+			printf("records output(%s)\n", ad.filename2);
+		}
+		if (argc > 7) {
+			ad.numbuffers = atoi(argv[7]);
+		}
+	} else {
+		printf("launch with camera source\n");
+	}
+
+	if (argc < 2) {
+		printf("usage: mv_stream_infer model [NeckThresVal, ArmThresVal, LegThresVal, [filename]]");
+		printf("model: 0(CPM), 1(HOURGLASS), 2(AIC Hand), 3(AIC Lite Hand\n");
+		return -1;
+	}
+
+	ad.modelType = atoi(argv[1]);
+	if (ad.modelType < 0 || ad.modelType > 3) {
+		printf("not supported model type\n");
+		return -1;
+	}
+
+	if (ad.modelType != MODEL_TYPE_POSE_HAND_AIC &&
+		ad.modelType != MODEL_TYPE_POSE_HAND_AICLite) {
+		thValNeck = (float)atoi(argv[2])/100.f;
+		thValArm = (float)atoi(argv[3])/100.f;
+		thValLeg = (float)atoi(argv[4])/100.f;
+
+		poseRoi.point.x = 50;
+		poseRoi.point.y = 0;
+		poseRoi.width = 100;
+		poseRoi.height = 192;
+	} else {
+		if (argc > 2) {
+			ad.filename2 = g_strdup(argv[2]);
+		}
+		poseRoi.point.x = 0;
+		poseRoi.point.y = 0;
+		poseRoi.width = 0;
+		poseRoi.height = 0;
+	}
+
+	return appcore_efl_main(PACKAGE, &argc, &argv, &ops);
+}