[testsuite/stream_infer] Support hand gesture model and add stream_infer
authorTae-Young Chung <ty83.chung@samsung.com>
Fri, 12 Jun 2020 01:24:49 +0000 (10:24 +0900)
committerTae-Young Chung <ty83.chung@samsung.com>
Fri, 12 Jun 2020 01:24:52 +0000 (10:24 +0900)
Hand gesture model consists of two models. One is a model that gets an image
as an input and gives a hand segmentation and heatmaps as an output. Another is
a model that gets filtered heatmaps, raw tensor buffer, as an input and gives
coordinate and a gesture as an output.

To support that, apis are added:
mv_source_fill_by_tensor_buffer()
 v_inference_hand_detect()
mv_inference_pose_estimation_detect()

To test the models,
stream_infer testsuite is added.

Change-Id: Id150dd893c229e2a207e099b46f8d53b029e291f
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
21 files changed:
include/mv_common.h
include/mv_inference.h
include/mv_inference_type.h
mv_common/include/MediaSource.h
mv_common/include/mv_common_c.h
mv_common/src/MediaSource.cpp
mv_common/src/mv_common.c
mv_common/src/mv_common_c.cpp
mv_inference/inference/CMakeLists.txt
mv_inference/inference/include/Inference.h
mv_inference/inference/include/mv_inference_open.h
mv_inference/inference/src/Inference.cpp
mv_inference/inference/src/mv_inference.c
mv_inference/inference/src/mv_inference_open.cpp
packaging/capi-media-vision.spec
src/mv_common.c
src/mv_inference.c
test/testsuites/CMakeLists.txt
test/testsuites/inference/inference_test_suite.c
test/testsuites/stream_infer/CMakeLists.txt [new file with mode: 0644]
test/testsuites/stream_infer/stream_infer.c [new file with mode: 0644]

index dc2faf88dc9ec4495ff319a9050c2b8b55f7ddda..f1244c7152967dbb5deeeff5b683b91a35b1a3b9 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <media_packet.h>
 
+#include <mv_inference_type.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
@@ -245,6 +247,16 @@ int mv_source_fill_by_buffer(
                unsigned int image_height,
                mv_colorspace_e image_colorspace);
 
+int mv_source_fill_by_tensor_buffer(
+               mv_source_h source,
+               void *data_buffer,
+               mv_inference_data_type_e type, // common type may be required. and then it will be converted to inference data type
+               unsigned int buffer_size,
+               unsigned int width,
+               unsigned int height,
+               unsigned int channel,
+               unsigned int dimension);
+
 /**
  * @brief Clears the buffer of the media source.
  *
@@ -285,6 +297,10 @@ int mv_source_get_buffer(
                unsigned char **data_buffer,
                unsigned int *buffer_size);
 
+int mv_source_get_tensor_buffer(
+               mv_source_h source,
+               void **data_buffer,
+               unsigned int *buffer_size);
 /**
  * @brief Gets height of the media source.
  *
@@ -323,6 +339,14 @@ int mv_source_get_width(
                mv_source_h source,
                unsigned int *image_width);
 
+int mv_source_get_channel(
+               mv_source_h source,
+               unsigned int *channel);
+
+int mv_source_get_dimension(
+               mv_source_h source,
+               unsigned int *dims);
+
 /**
  * @brief Gets colorspace of the media source.
  *
@@ -342,6 +366,8 @@ int mv_source_get_colorspace(
                mv_source_h source,
                mv_colorspace_e *image_colorspace);
 
+
+bool mv_source_is_tensor(mv_source_h source);
 /**
  * @brief Creates the handle to the configuration of engine.
  *
index 5af4193091db287c131199b547c2be0e73483534..2b60668573b89c0ac9833d99f11cdd0afcde3f58 100644 (file)
@@ -724,6 +724,8 @@ int mv_inference_pose_estimation_detect(
        mv_source_h source,
        mv_inference_h infer,
        mv_rectangle_s *roi,
+       float scale_width,
+       float scale_height,
        mv_inference_pose_estimation_detected_cb detected_cb,
        void *user_data);
 
index dbe0a85ef612d43ad50b235d8e71e734a483f37b..41a9e9dea1955de1f6f4e61571791500d9062163 100644 (file)
@@ -51,7 +51,7 @@ typedef enum {
  * @deprecated Deprecated since 6.0. Use #mv_inference_target_device_e instead.
  * @brief Enumeration for inference target.
  *
- * @since_tizem 5.5
+ * @since_tizen 5.5
  *
  */
 typedef enum {
index 0bd5e030213fe43b2487d82c6a23126faf0bf395..c67f4b789f8d93bc07df2263456fa62d3124f2a4 100644 (file)
@@ -119,13 +119,15 @@ public:
        bool fill(const unsigned char *buffer, unsigned int bufferSize,
                        unsigned int width, unsigned int height, size_t offset);
 
+       bool fill(void *buffer, mv_inference_data_type_e type, unsigned int bufferSize,
+                       unsigned int width, unsigned int height, unsigned int channel, unsigned int dimension);
        /**
         * @brief Gets data buffer of the MediaSource.
         *
         * @since_tizen @if MOBILE 2.4 @else 3.0 @endif
         * @return Pointer to the data buffer.
         */
-       unsigned char *getBuffer(void) const;
+       void *getBuffer(void) const;
 
        /**
         * @brief Gets buffer size of the MediaSource.
@@ -151,6 +153,9 @@ public:
         */
        unsigned int getHeight(void) const;
 
+       unsigned int getChannel(void) const;
+
+       unsigned int getDimension(void) const;
        /**
         * @brief Gets image colorspace of the MediaSource.
         *
@@ -159,8 +164,12 @@ public:
         */
        mv_colorspace_e getColorspace(void) const;
 
+       mv_inference_data_type_e getType(void) const;
+
+       bool getIsTensor(void) const;
+
 private:
-       unsigned char *m_pBuffer;        /**< The data buffer */
+       void *m_pBuffer;        /**< The data buffer */
 
        unsigned int m_bufferSize;       /**< The buffer size */
 
@@ -169,6 +178,12 @@ private:
        unsigned int m_height;           /**< The image height */
 
        mv_colorspace_e m_colorspace;    /**< The image colorspace */
+
+       unsigned int m_dim;
+       unsigned int m_ch;
+       bool m_isTensor;
+       mv_inference_data_type_e m_type;
+
 };
 
 } /* Common */
index 234fc52992bbde127262425d0635cdf82125c5d9..539566c18781f78ae83213ab5dafc92aa3671943 100644 (file)
@@ -108,6 +108,16 @@ int mv_source_fill_by_buffer_c(
                unsigned int image_height,
                mv_colorspace_e image_colorspace);
 
+int mv_source_fill_by_tensor_buffer_c(
+               mv_source_h source,
+               void *data_buffer,
+               mv_inference_data_type_e type, // common type may be required. and then it will be converted to inference data type
+               unsigned int buffer_size,
+               unsigned int width,
+               unsigned int height,
+               unsigned int channel,
+               unsigned int dimension);
+
 /**
  * @brief Clears the buffer of the media source.
  *
@@ -146,6 +156,11 @@ int mv_source_get_buffer_c(
                unsigned char **data_buffer,
                unsigned int *buffer_size);
 
+int mv_source_get_tensor_buffer_c(
+               mv_source_h source,
+               void **buffer,
+               unsigned int *size);
+
 /**
  * @brief Gets height of the media source.
  *
@@ -182,6 +197,14 @@ int mv_source_get_width_c(
                mv_source_h source,
                unsigned int *image_width);
 
+int mv_source_get_channel_c(
+               mv_source_h source,
+               unsigned int *channel);
+
+
+int mv_source_get_dimension_c(
+               mv_source_h source,
+               unsigned int *dims);
 /**
  * @brief Gets colorspace of the media source.
  *
@@ -200,6 +223,9 @@ int mv_source_get_colorspace_c(
                mv_source_h source,
                mv_colorspace_e *image_colorspace);
 
+bool mv_source_is_tensor_c(mv_source_h source);
+
+
 /**
  * @brief Creates the handle to the configuration of engine.
  *
index 50f956c7b37a3765bd54428581e8ccb97a11a1c2..cb5fc874b3cf397c4c882594358c1d7c53f5cc1a 100644 (file)
@@ -29,7 +29,11 @@ MediaSource::MediaSource() :
        m_bufferSize(0),
        m_width(0),
        m_height(0),
-       m_colorspace(MEDIA_VISION_COLORSPACE_INVALID)
+       m_colorspace(MEDIA_VISION_COLORSPACE_INVALID),
+       m_dim(0),
+       m_ch(0),
+       m_isTensor(false),
+       m_type(MV_INFERENCE_DATA_UINT8)
 {
 }
 
@@ -66,6 +70,7 @@ bool MediaSource::alloc(unsigned int bufferSize,
                        "the media source %p", colorspace, this);
        m_colorspace = colorspace;
 
+       m_type = MV_INFERENCE_DATA_UINT8;
        return true;
 }
 
@@ -73,7 +78,10 @@ void MediaSource::clear(void)
 {
        if (m_pBuffer != NULL) {
                LOGD("Delete internal buffer for media source %p", this);
-               delete[] m_pBuffer;
+               if (m_type == MV_INFERENCE_DATA_FLOAT32)
+                       delete[] static_cast<float*>(m_pBuffer);
+               else
+                       delete[] static_cast<unsigned char*>(m_pBuffer);
        }
        LOGD("Set defaults for media source %p : buffer = NULL; "
                        "bufferSize = 0; width = 0; height = 0; "
@@ -83,6 +91,10 @@ void MediaSource::clear(void)
        m_width = 0;
        m_height = 0;
        m_colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+       m_ch = 0;
+       m_dim = 0;
+       m_isTensor = false;
+       m_type = MV_INFERENCE_DATA_UINT8;
 }
 
 bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize,
@@ -121,6 +133,7 @@ bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize,
        LOGD("Assign new colorspace (%i) of the internal buffer image for "
                        "the media source %p", colorspace, this);
        m_colorspace = colorspace;
+       m_type = MV_INFERENCE_DATA_UINT8;
 
        return true;
 }
@@ -144,18 +157,74 @@ bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize,
 
        LOGD("Copy data from external buffer (%p) to the internal buffer (%p + %zd) of "
                        "media source %p", buffer, m_pBuffer, offset, this);
-       std::memcpy(m_pBuffer + offset, buffer, bufferSize);
+       std::memcpy(static_cast<unsigned char*>(m_pBuffer) + offset, buffer, bufferSize);
 
        LOGD("size is %ui x %ui [%ui] on buffer(%p).", width, height, bufferSize, this);
 
+       m_type = MV_INFERENCE_DATA_UINT8;
+
+       return true;
+}
+
+bool MediaSource::fill(void *buffer, mv_inference_data_type_e type, unsigned int bufferSize,
+                       unsigned int width, unsigned int height, unsigned int channel, unsigned int dimension)
+{
+       if (bufferSize == 0 || buffer == NULL)
+               return false;
+
+       LOGD("Call clear() first for media source %p", this);
+       clear();
+
+       LOGD("Allocate memory [%i] for buffer in media source %p", bufferSize, this);
+       LOGD("Assign new size (%ui x %ui) of the internal buffer image for "
+                       "the media source %p", width, height, this);
+       LOGD("Assign new buffer with type (%i) of the internal buffer for "
+               "the source %p", type, this);
+       if (type == MV_INFERENCE_DATA_FLOAT32) {
+               m_pBuffer = new (std::nothrow)float[bufferSize];
+       } else if (type == MV_INFERENCE_DATA_UINT8) {
+               m_pBuffer = new (std::nothrow)unsigned char[bufferSize];
+       }
+
+       if (m_pBuffer == NULL) {
+               LOGE("Memory allocating for buffer in media source %p failed!", this);
+               return false;
+       }
+
+       LOGD("Copy data from external buffer (%p) to the internal buffer (%p) of "
+                       "media source %p", buffer, m_pBuffer, this);
+       std::memcpy(m_pBuffer, buffer, bufferSize);
+
+       LOGD("Assign new size of the internal buffer of media source %p. "
+                       "New size is %ui.", this, bufferSize);
+       m_bufferSize = bufferSize;
+
+       LOGD("Assign new size (%ui x %ui x %ui) of the internal buffer image for "
+                       "the media source %p", width, height, channel, this);
+       m_width = width;
+       m_height = height;
+       m_ch = channel;
+       m_dim = dimension;
+       m_isTensor = true;
+       m_type = type;
+
+       LOGD("Assign new colorspace (%i) of the internal buffer image for "
+                       "the media source %p", MEDIA_VISION_COLORSPACE_INVALID, this);
+       m_colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+
        return true;
 }
 
-unsigned char *MediaSource::getBuffer(void) const
+void *MediaSource::getBuffer(void) const
 {
        return m_pBuffer;
 }
 
+mv_inference_data_type_e MediaSource::getType(void) const
+{
+       return m_type;
+}
+
 unsigned int MediaSource::getBufferSize(void) const
 {
        return m_bufferSize;
@@ -171,10 +240,24 @@ unsigned int MediaSource::getHeight(void) const
        return m_height;
 }
 
+unsigned int MediaSource::getChannel(void) const
+{
+       return m_ch;
+}
+
+unsigned int MediaSource::getDimension(void) const
+{
+       return m_dim;
+}
+
 mv_colorspace_e MediaSource::getColorspace(void) const
 {
        return m_colorspace;
 }
 
+bool MediaSource::getIsTensor(void) const
+{
+       return m_isTensor;
+}
 } /* Common */
 } /* MediaVision */
index c01536d0dae6f9274e26d0bd795da9e7c5fa81b3..283a19defd876b12efebc390c3cb51042f0e81cb 100644 (file)
@@ -80,6 +80,29 @@ int mv_source_fill_by_buffer(
        return ret;
 }
 
+int mv_source_fill_by_tensor_buffer(
+               mv_source_h source,
+               void *data_buffer,
+               mv_inference_data_type_e type,
+               unsigned int buffer_size,
+               unsigned int width,
+               unsigned int height,
+               unsigned int channel,
+               unsigned int dimension)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_fill_by_tensor_buffer_c(
+               source, data_buffer, type, buffer_size, width, height,
+               channel, dimension);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_source_clear(
                mv_source_h source)
 {
@@ -110,6 +133,23 @@ int mv_source_get_buffer(
        return ret;
 }
 
+int mv_source_get_tensor_buffer(
+               mv_source_h source,
+               void **data_buffer,
+               unsigned int *buffer_size)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+       MEDIA_VISION_NULL_ARG_CHECK(buffer_size);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_get_tensor_buffer_c(source, data_buffer, buffer_size);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_source_get_height(
                mv_source_h source,
                unsigned int *image_height)
@@ -140,6 +180,36 @@ int mv_source_get_width(
        return ret;
 }
 
+int mv_source_get_channel(
+               mv_source_h source,
+               unsigned int *channel)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(channel);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_get_channel_c(source, channel);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_source_get_dimension(
+               mv_source_h source,
+               unsigned int *dims)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(dims);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_get_dimension_c(source, dims);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_source_get_colorspace(
                mv_source_h source,
                mv_colorspace_e *image_colorspace)
@@ -155,6 +225,19 @@ int mv_source_get_colorspace(
        return ret;
 }
 
+bool mv_source_is_tensor(mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       bool ret = mv_source_is_tensor_c(source);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+
 int mv_create_engine_config(
                mv_engine_config_h *engine_cfg)
 {
index 28ba8382d452cbd45ad0e3fc590cbc27a6f4f6c2..50b9d86021b5e3e59e6d5b03db5119799a67bdf4 100644 (file)
@@ -232,6 +232,34 @@ int mv_source_fill_by_buffer_c(
        return MEDIA_VISION_ERROR_NONE;
 }
 
+int mv_source_fill_by_tensor_buffer_c(
+               mv_source_h source,
+               void *data_buffer,
+               mv_inference_data_type_e type,
+               unsigned int buffer_size,
+               unsigned int width,
+               unsigned int height,
+               unsigned int channel,
+               unsigned int dimension)
+{
+       if (!source || buffer_size == 0 || data_buffer == NULL) {
+               LOGE("Media source can't be filled by tensor buffer because "
+                               "one of the source or data_buffer is NULL or buffer_size = 0. "
+                               "source = %p; data_buffer = %p; buffer_size = %u",
+                               source, data_buffer, buffer_size);
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       if (!(static_cast<MediaVision::Common::MediaSource*>(source))->fill(data_buffer,
+                       type, buffer_size, width, height, channel, dimension)) {
+               LOGE("mv_source_h filling from tehsor buffer failed");
+               return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+       }
+
+       LOGD("Media source has been filled from buffer");
+       return MEDIA_VISION_ERROR_NONE;
+}
+
 int mv_source_clear_c(
                mv_source_h source)
 {
@@ -258,6 +286,25 @@ int mv_source_get_buffer_c(
        }
 
        LOGD("Get media vision source [%p] buffer and buffer size to be returned", source);
+       *buffer = static_cast<unsigned char*>((static_cast<MediaVision::Common::MediaSource*>(source))->getBuffer());
+       *size = (static_cast<MediaVision::Common::MediaSource*>(source))->getBufferSize();
+       LOGD("Media vision source [%p] buffer (%p) and buffer size (%ui) has been returned", source, buffer, *size);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_source_get_tensor_buffer_c(
+               mv_source_h source,
+               void **buffer,
+               unsigned int *size)
+{
+       if (!source) {
+               LOGE("Impossible to get buffer for NULL mv_source_h handle");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       LOGD("Get media vision source [%p] buffer and buffer size to be returned", source);
+
        *buffer = (static_cast<MediaVision::Common::MediaSource*>(source))->getBuffer();
        *size = (static_cast<MediaVision::Common::MediaSource*>(source))->getBufferSize();
        LOGD("Media vision source [%p] buffer (%p) and buffer size (%ui) has been returned", source, buffer, *size);
@@ -265,6 +312,7 @@ int mv_source_get_buffer_c(
        return MEDIA_VISION_ERROR_NONE;
 }
 
+
 int mv_source_get_height_c(
                mv_source_h source,
                unsigned int *height)
@@ -297,6 +345,39 @@ int mv_source_get_width_c(
        return MEDIA_VISION_ERROR_NONE;
 }
 
+int mv_source_get_channel_c(
+               mv_source_h source,
+               unsigned int *channel)
+{
+       if (!source) {
+               LOGE("Impossible to get challen for NULL mv_source_h handle");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       LOGD("Get media vision source [%p] width to be returned", source);
+       *channel = (static_cast<MediaVision::Common::MediaSource*>(source))->getChannel();
+       LOGD("Media vision source [%p] channel (%ui) has been returned", source, *channel);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_source_get_dimension_c(
+               mv_source_h source,
+               unsigned int *dims)
+{
+       if (!source) {
+               LOGE("Impossible to get width for NULL mv_source_h handle");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       LOGD("Get media vision source [%p] dims to be returned", source);
+       *dims = (static_cast<MediaVision::Common::MediaSource*>(source))->getDimension();
+       LOGD("Media vision source [%p] dims (%ui) has been returned", source, *dims);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+
 int mv_source_get_colorspace_c(
                mv_source_h source,
                mv_colorspace_e *colorspace)
@@ -313,6 +394,23 @@ int mv_source_get_colorspace_c(
        return MEDIA_VISION_ERROR_NONE;
 }
 
+bool mv_source_is_tensor_c(mv_source_h source)
+{
+       // int mv_source_is_tensor_c(mv_source_h source, bool *isTensor)
+       /*
+       if (!source) {
+               LOGE("mv_source_h handle is NULL");
+               return ;
+       }
+       */
+
+       LOGD("Get media vision source [%p]", source);
+       bool ret = (static_cast<MediaVision::Common::MediaSource*>(source))->getIsTensor();
+       LOGD("Media vision source [%p] is %s", source, ret ? "tensor" : "not tensor");
+
+       return ret;
+}
+
 int mv_create_engine_config_c(
                mv_engine_config_h *engine_cfg)
 {
index 362bc9fbf3f2d164f3567076de4efa35de74f4f7..0b8522b23fbe5a7c47e67179731d4c05554cd56a 100644 (file)
@@ -28,7 +28,7 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXXFLAGS} -Wno-unused-parameter
 FILE(GLOB MV_INFERENCE_INCLUDE_LIST "${PROJECT_SOURCE_DIR}/include/*.h" "${PROJECT_SOURCE_DIR}/include/*.hpp")
 FILE(GLOB MV_INFERENCE_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp")
 
-FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc)
+FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc imgcodecs)
 if(NOT OpenCV_FOUND)
        MESSAGE(SEND_ERROR "OpenCV NOT FOUND")
        RETURN()
index 4c126ce9b092ba0b52f92ea30bba3e4e35f713ca..7422a0abc8327241a2c7387a6bbde3bee5c6fcd7 100755 (executable)
@@ -62,13 +62,15 @@ typedef struct _FacialLandMarkDetectionResults {
 
 typedef struct _PoseEstimationResults {
     int number_of_pose_estimation;
-    std::vector<cv::Point> locations;
+    std::vector<cv::Point2f> locations;
+       int gesture;
 } PoseEstimationResults;  /**< structure PoseEstimationResults */
 
 typedef struct _HandDetectionResults {
     int number_of_hands;
     std::vector<float> confidences;
     std::vector<cv::Rect> locations;
+       void* outputTensorData;
 } HandDetectionResults;  /**< structure HandDetectionResults */
 
 namespace mediavision {
@@ -345,6 +347,8 @@ private:
        cv::Size mSourceSize;
        cv::Mat mInputBuffer;
 
+       cv::Mat mHeatMapMatrix;
+
        mv_engine_config_h engine_config;
 
        InferenceEngineCommon * mBackend;
index 3bdc559308e58db0b2d4e91de23d56c797f7ef67..20c7d7c83a01c79a5d2b125f155d56ff1ee34566 100755 (executable)
@@ -530,6 +530,8 @@ int mv_inference_pose_estimation_detect_open(
        mv_source_h source,
        mv_inference_h infer,
     mv_rectangle_s *roi,
+       float scale_width,
+       float scale_height,
        mv_inference_pose_estimation_detected_cb detected_cb,
        void *user_data);
 
index c7b5d2f280f035be9221e53a70b2266277222bd1..da88c53286245afc6f855fee8bde310929aa4a8b 100755 (executable)
@@ -17,7 +17,7 @@
 #include "mv_private.h"
 #include "Inference.h"
 #include "InferenceIni.h"
-
+#include <opencv2/imgcodecs.hpp>
 #include <map>
 
 #include <unistd.h>
@@ -686,6 +686,7 @@ int Inference::FillOutputResult(tensor_t &outputData)
                }
 
                outputData.data.push_back((void *)mOutputTensorBuffers[i].buffer);
+               LOGI("%p",  mOutputTensorBuffers[i].buffer);
        }
 
        return MEDIA_VISION_ERROR_NONE;
@@ -870,9 +871,9 @@ int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle
        /* convert mv_source to cv::Mat */
        cv::Mat cvSource;
        cv::Rect cvRoi;
-       unsigned int width = 0, height = 0;
+       unsigned int width = 0, height = 0, channel = 0, dim = 0;
        unsigned int bufferSize = 0;
-       unsigned char *buffer = NULL;
+       void *buffer = NULL;
 
        if (mvSources.empty()) {
                LOGE("mvSources should contain only one cv source.");
@@ -891,47 +892,63 @@ int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle
 
        mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
 
-       if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
+       if (mv_source_is_tensor(mvSource)) {
+               if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
+                       mv_source_get_height(mvSource, &height) != MEDIA_VISION_ERROR_NONE ||
+                       mv_source_get_channel(mvSource, &channel) != MEDIA_VISION_ERROR_NONE ||
+                       mv_source_get_dimension(mvSource, &dim) != MEDIA_VISION_ERROR_NONE ||
+                       mv_source_get_tensor_buffer(mvSource, &buffer, &bufferSize))
+                       return MEDIA_VISION_ERROR_INTERNAL;
+       } else {
+               if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
                        mv_source_get_height(mvSource, &height) != MEDIA_VISION_ERROR_NONE ||
                        mv_source_get_colorspace(mvSource, &colorspace) != MEDIA_VISION_ERROR_NONE ||
-                       mv_source_get_buffer(mvSource, &buffer, &bufferSize))
-               return MEDIA_VISION_ERROR_INTERNAL;
+                       mv_source_get_buffer(mvSource, reinterpret_cast<unsigned char**>(&buffer), &bufferSize))
+                       return MEDIA_VISION_ERROR_INTERNAL;
 
-       // TODO. Let's support various color spaces.
+               // TODO. Let's support various color spaces.
 
-       if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
-               LOGE("Not Supported format!\n");
-               return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
-       }
+               if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
+                       LOGE("Not Supported format!\n");
+                       return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
+               }
 
-       if (roi == NULL) {
-               cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer).clone();
-       } else {
-               cvRoi.x = roi->point.x;
-               cvRoi.y = roi->point.y;
-               cvRoi.width = (roi->point.x + roi->width) >= width ? width - roi->point.x : roi->width;
-               cvRoi.height = (roi->point.y + roi->height) >= height ? height - roi->point.y : roi->height;
-               cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer)(cvRoi).clone();
-       }
+               if (roi == NULL) {
+                       cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer).clone();
+               } else {
+                       cvRoi.x = roi->point.x;
+                       cvRoi.y = roi->point.y;
+                       cvRoi.width = (roi->point.x + roi->width) >= width ? width - roi->point.x : roi->width;
+                       cvRoi.height = (roi->point.y + roi->height) >= height ? height - roi->point.y : roi->height;
+                       cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer)(cvRoi).clone();
+               }
 
-       LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height);
+               LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height);
 
-       if (mCh != 1 && mCh != 3) {
-               LOGE("Channel not supported.");
-               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               if (mCh != 1 && mCh != 3) {
+                       LOGE("Channel not supported.");
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+               }
        }
 
+
+
        std::vector<inference_engine_tensor_buffer>::iterator iter;
        for (iter = mInputTensorBuffers.begin(); iter != mInputTensorBuffers.end(); iter++) {
                inference_engine_tensor_buffer tensor_buffer = *iter;
 
-               int data_type = ConvertToCv(tensor_buffer.data_type);
+               if (mv_source_is_tensor(mvSource)) {
+                       memcpy(tensor_buffer.buffer, buffer, bufferSize);
+               } else {
 
-               // Convert color space of input tensor data and then normalize it.
-               ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, data_type, tensor_buffer.buffer), data_type);
-               if (ret != MEDIA_VISION_ERROR_NONE) {
-                       LOGE("Fail to preprocess input tensor data.");
-                       return ret;
+                       int data_type = ConvertToCv(tensor_buffer.data_type);
+
+                       // Convert color space of input tensor data and then normalize it.
+                       ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, data_type, tensor_buffer.buffer), data_type);
+                       if (ret != MEDIA_VISION_ERROR_NONE) {
+                               LOGE("Fail to preprocess input tensor data.");
+                               return ret;
+                       }
                }
        }
 
@@ -1262,6 +1279,7 @@ int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectio
        std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
        std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
 
+#if 0
        long number_of_pose = inferDimInfo[0][3];
        float * tmp = static_cast<float*>(inferResults[0]);
        cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
@@ -1297,6 +1315,51 @@ int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectio
 
        *detectionResults = results;
        LOGE("Inference: PoseEstimationResults: %d\n", results.number_of_pose_estimation);
+
+       #else
+       for (int k = 0; k < inferDimInfo.size(); ++k) {
+               LOGI("output: %d", k);
+               LOGI("addr: %p", inferResults[k]);
+               for(int d = 0; d < inferDimInfo[k].size(); ++d) {
+                       LOGI("%d", inferDimInfo[k][d]);
+               }
+               LOGI("\n");
+       }
+
+       //float* coordsPtr = reinterpret_cast<float*>(inferResults[0]);
+       int64_t * gesturePtr = reinterpret_cast<int64_t*>(inferResults[1]);
+       //LOGI("%lld", gesturePtr[0]);
+       cv::Mat gestureOut(cv::Size(inferDimInfo[1][0], inferDimInfo[1][1]), CV_8UC(sizeof(int64_t)), gesturePtr);
+       cv::Mat gestureReshaped = gestureOut.reshape(sizeof(int64_t), inferDimInfo[1][0]);
+       cv::Mat gestureArr[8];
+       cv::split(gestureReshaped, gestureArr);
+       int gesture = gestureArr[0].at<unsigned int>(0);
+
+       //float ratioX = (float)mSourceSize.width;;
+       //float ratioY = (float)mSourceSize.height;
+
+       int64_t * coordPtr = reinterpret_cast<int64_t*>(inferResults[0]);
+       long number_of_pose = inferDimInfo[0][1];
+       cv::Mat coordOut(cv::Size(inferDimInfo[0][0], inferDimInfo[0][1]), CV_32FC(inferDimInfo[0][2]), coordPtr);
+
+       PoseEstimationResults results;
+       results.number_of_pose_estimation = 0;
+       results.gesture = (int)gesture;
+
+       for (int poseIdx = 0; poseIdx < number_of_pose; poseIdx++) {
+               cv::Point2f loc;
+               cv::Vec2f coord = coordOut.at<cv::Vec2f>(0,poseIdx);
+               LOGI("PoseIdx[%2d]: x[%2f], y[%2f]", poseIdx, coord[0], coord[1]);
+
+               loc.x = coord[0];
+               loc.y = coord[1];
+               results.locations.push_back(loc);
+               results.number_of_pose_estimation++;
+       }
+
+       *detectionResults = results;
+
+       #endif
        return MEDIA_VISION_ERROR_NONE;
 }
 
@@ -1311,88 +1374,186 @@ int Inference::GetHandDetectionResults(HandDetectionResults *detectionResults)
                return ret;
        }
 
-       // In case of object detection,
-       // a model may apply post-process but others may not.
-       // Thus, those cases should be hanlded separately.
+#if 1
        std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
-       LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
-
        std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
-       LOGI("inferResults size: %zu", inferResults.size());
 
-       float* boxes = nullptr;
-       float* classes = nullptr;
-       float* scores = nullptr;
-       int number_of_detections = 0;
+       for (int k = 0; k < inferDimInfo.size(); ++k) {
+               LOGI("output: %d", k);
+               LOGI("addr: %p", inferResults[k]);
+               for(int d = 0; d < inferDimInfo[k].size(); ++d) {
+                       LOGI("%d", inferDimInfo[k][d]);
+               }
+               LOGI("\n");
+       }
+       HandDetectionResults results;
+       results.number_of_hands = 0;
 
-       cv::Mat cvScores, cvClasses, cvBoxes;
-       if (outputData.dimInfo.size() == 1) {
-               // there is no way to know how many objects are detect unless the number of objects aren't
-               // provided. In the case, each backend should provide the number of results manually.
-               // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
-               // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
-               // indicats the image id. But it is useless if a batch mode isn't supported.
-               // So, use the 1st of 7.
+       // bbox
+       int64_t * bboxPtr = reinterpret_cast<int64_t*>(inferResults[0]);
+       float* heatmapPtr = reinterpret_cast<float*>(inferResults[1]);
 
-               number_of_detections = (int)(*reinterpret_cast<float*>(outputData.data[0]));
-               cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], CV_32F, outputData.data[0]);
+       double number_of_results = 0;
+       double maxValue = 0.0;
+       cv::Mat kernel = cv::Mat::ones(2,2, CV_8UC1);
 
-               // boxes
-               cv::Mat cvLeft = cvOutputData.col(3).clone();
-               cv::Mat cvTop = cvOutputData.col(4).clone();
-               cv::Mat cvRight = cvOutputData.col(5).clone();
-               cv::Mat cvBottom = cvOutputData.col(6).clone();
+       //int maskSz[] = {inferDimInfo[1][2], inferDimInfo[1][1], inferDimInfo[1][3]};
+       //int heatMapSz[] = {inferDimInfo[1][2], inferDimInfo[1][1], inferDimInfo[1][3]};
+       //dj
+       cv::Mat bboxOut(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), CV_8UC(sizeof(int64_t)), bboxPtr);
+       cv::Mat bboxReshaped = bboxOut.reshape(sizeof(int64_t), inferDimInfo[1][2]);
+       cv::Mat bboxArr[8];
+       cv::split(bboxReshaped, bboxArr);
+       cv::Mat bbox = bboxArr[0];
 
-               cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
-               cv::hconcat(cvBoxElems, 4, cvBoxes);
+       bbox =  bbox*255;
 
-               // classes
-               cvClasses = cvOutputData.col(1).clone();
+       ////cv::imwrite("/tmp/dumpbbox.jpg",bbox);
+       cv::Mat bboxOpened, bboxClosed;
+       cv::Mat backGround, distTransformed, foreGroundF, foreGroundU;
+       cv::Mat unKnown, markers;
 
-               // scores
-               cvScores = cvOutputData.col(2).clone();
+       // opening
+       cv::morphologyEx(bbox, bboxOpened, cv::MORPH_OPEN, kernel);
+       // dilate
+       cv::dilate(bboxOpened, backGround, kernel);
 
-               boxes = cvBoxes.ptr<float>(0);
-               classes = cvClasses.ptr<float>(0);
-               scores = cvScores.ptr<float>(0);
+       // get euclidean distance by distance transform
+       cv::distanceTransform(bboxOpened, distTransformed, cv::DIST_L2, 5); // euclidean dist.
 
-       } else {
-               boxes = reinterpret_cast<float*>(inferResults[0]);
-               classes = reinterpret_cast<float*>(inferResults[1]);
-               scores = reinterpret_cast<float*>(inferResults[2]);
-               number_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
-       }
+       cv::minMaxLoc(distTransformed, NULL, &maxValue, NULL, NULL);
+       LOGI("max value of distTransformed: %f", maxValue);
+       cv::threshold(distTransformed, foreGroundF, 0.5*maxValue, 255, cv::THRESH_BINARY);
 
-       int left, top, right, bottom;
-       cv::Rect loc;
+       LOGI("type of foreGround: %d\n", foreGroundF.type()); //#define CV_8F   5
+       LOGI("type of backGround: %d\n", backGround.type()); //#define CV_8U   0
+       foreGroundF.convertTo(foreGroundU, CV_8U);
+       LOGI("type of backGround: %d\n", foreGroundU.type()); //#define CV_8U   0
+       cv::subtract(backGround, foreGroundU , unKnown);
+       LOGI("type of unKnown: %d\n", unKnown.type()); //
 
-       HandDetectionResults results;
-       results.number_of_hands = 0;
-       for (int idx = 0; idx < number_of_detections; ++idx) {
-               if (scores[idx] < mThreshold)
-                       continue;
 
-               left =   (int)(boxes[idx*4 + 1] * mSourceSize.width);
-               top  =   (int)(boxes[idx*4 + 0] * mSourceSize.height);
-               right  = (int)(boxes[idx*4 + 3] * mSourceSize.width);
-               bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height);
+       cv::connectedComponents(foreGroundU, markers);
 
-               loc.x = left;
-               loc.y = top;
-               loc.width = right -left + 1;
-               loc.height = bottom - top + 1;
+       markers += 1;
 
-               results.confidences.push_back(scores[idx]);
-               results.locations.push_back(loc);
-               results.number_of_hands++;
+       markers.setTo(0, markers==255);
 
-               LOGI("confidence:%f", scores[idx]);
-               LOGI("class: %f", classes[idx]);
-               LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx*4 + 1], boxes[idx*4 + 0], boxes[idx*4 + 3], boxes[idx*4 + 2]);
-               LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
+       cv::minMaxLoc(markers, NULL, &number_of_results, NULL, NULL);
+       //results.number_of_pose_estimation = static_cast<int>(number_of_results)-1;
+       LOGI("number_of_results: %d", static_cast<int>(number_of_results)-1);
+
+       //cv::Mat heatMap = cv::Mat(3, heatMapSz, CV_32FC1, heatmapPtr);
+       cv::Mat heatMap = cv::Mat(cv::Size(inferDimInfo[1][2],inferDimInfo[1][1]), CV_32FC(inferDimInfo[1][3]), heatmapPtr);
+       cv::Mat heatMapReshape = heatMap.reshape(inferDimInfo[1][3], inferDimInfo[1][2]);
+       cv::Mat heatMapReshapeArr[inferDimInfo[1][3]];
+       cv::Mat heatMapMatrixArr[inferDimInfo[1][3]];
+       cv::split(heatMapReshape, heatMapReshapeArr);
+
+
+       cv::Mat mask = cv::Mat::zeros(inferDimInfo[0][2], inferDimInfo[0][1], CV_8UC1);
+       cv::Mat maskImg = cv::Mat::zeros(inferDimInfo[0][2], inferDimInfo[0][1], CV_8UC1);
+       cv::Mat kernel2 = cv::Mat::ones(3,3, CV_8UC1);
+
+
+       float ratioX = (float)mSourceSize.width / (float)inferDimInfo[0][2];
+       float ratioY = (float)mSourceSize.height / (float)inferDimInfo[0][1];
+       for (int resultIdx = 0; resultIdx <  (static_cast<int>(number_of_results)-1); resultIdx++) {
+               mask.setTo(1, markers==(resultIdx + 2));
+
+               cv::Mat maskIdx;
+               findNonZero(mask, maskIdx);
+               int numIdx = maskIdx.total();
+               LOGI("type of maskIdx: %d, and ch: %d, total: %d points\n", mask.type(), mask.channels(), numIdx );
+               int minX, minY;
+               int maxX, maxY;
+               int maxBBSize = 0;
+               int maxBBSizeIdx = -1;
+               minX = minY = 100;
+               maxX = maxY = -1;
+
+               for (int idx = 0; idx < numIdx; ++idx) {
+                       //LOGI("%d, %d", mask.at<cv::Point>(0).x, mask.at<cv::Point>(0).y);
+                       if (maskIdx.at<cv::Point>(idx).x < minX) {
+                               minX = maskIdx.at<cv::Point>(idx).x;
+                       }
+
+                       if (maskIdx.at<cv::Point>(idx).y < minY) {
+                               minY = maskIdx.at<cv::Point>(idx).y;
+                       }
+
+                       if (maskIdx.at<cv::Point>(idx).x >= maxX) {
+                               maxX = maskIdx.at<cv::Point>(idx).x;
+                       }
+
+                       if (maskIdx.at<cv::Point>(idx).y >= maxY) {
+                               maxY = maskIdx.at<cv::Point>(idx).y;
+                       }
+               }
+
+               int bbSize = (maxX - minX) * (maxY - maxX);
+               if (bbSize > maxBBSize) {
+                       maxBBSize = bbSize;
+                       maxBBSizeIdx = resultIdx;
+
+                       minX = (int)((float)minX * ratioX);
+                       maxX = (int)((float)maxX * ratioX);
+                       minY = (int)((float)minY * ratioY);
+                       maxY = (int)((float)maxY * ratioY);
+                       results.locations.push_back(cv::Rect(minX, minY, (maxX - minX +1), (maxY-minY +1)));
+               }
+
+               LOGI("(%d,%d) - (%d,%d): size %d(idx:%d)", minX, minY, maxX, maxY, maxBBSize, maxBBSizeIdx);
+
+               cv::dilate(mask, maskImg, kernel2, cv::Point(-1,-1), 4);
+
+               /*
+               cv::Mat maskImg3d(3, maskSz, CV_32FC1);
+
+               //cv::Mat maskImg3d = maskImg.reshape(1, 3, sz);
+
+               for (int d1 = 0; d1 < inferDimInfo[1][2]; ++d1) {
+                       for (int d2 = 0; d2 < inferDimInfo[1][1]; ++d2) {
+                               for (int d3 = 0; d3 < inferDimInfo[0][3]; ++d3) {
+                                       maskImg3d.at<float>(d1, d2, d3) = maskImg.at<float>(d1, d2);
+                               }
+                       }
+               }
+               */
+
+               cv::Mat maskImgF;
+               maskImg.convertTo(maskImgF, CV_32FC1);
+               for (int ch = 0; ch < inferDimInfo[1][3]; ++ch ) {
+                       cv::multiply(heatMapReshapeArr[ch], maskImgF, heatMapMatrixArr[ch]);
+                       //char dumpName[1024];
+
+                       //snprintf(dumpName, 1024, "/tmp/heatmapDump_%d.csv", k);
+                       double maxVal;
+                       minMaxLoc(heatMapMatrixArr[ch], NULL, &maxVal, NULL, NULL);
+                       LOGI("%d: %f", ch, maxVal);
+                       /*
+                       snprintf(dumpName, 1024, "/tmp/heatmapDump_%d.csv", k);
+                       LOGI("%s", dumpName);
+                       std::ofstream dumpFile;
+                       dumpFile.open(dumpName);
+                       LOGI("tempArr shape: %dx%d", tempArr[k].size[0], tempArr[k].size[1]);
+                       dumpFile << cv::format(tempArr[k], cv::Formatter::FMT_CSV) << std::endl;
+                       dumpFile.close();
+                       */
+               }
+
+               cv::merge(heatMapMatrixArr, inferDimInfo[1][3], mHeatMapMatrix);
        }
 
+       LOGE("heatmapMatrix: type[%d], size[%d], elemSize[%d]", mHeatMapMatrix.type(),
+                                               mHeatMapMatrix.total(), mHeatMapMatrix.elemSize());
+
+       results.number_of_hands = static_cast<int>(number_of_results)-1;
+       results.outputTensorData = mHeatMapMatrix.ptr<void*>();
+       LOGE("mHeatMapMatrix: %p", results.outputTensorData);
        *detectionResults = results;
+#endif
+
        LOGE("Inference: GetHandDetectionResults: %d\n", results.number_of_hands);
        return MEDIA_VISION_ERROR_NONE;
 }
index c08339c821385fac108ea91777a1dd30c6b744f3..19fe9c8372a412c156f3ae2fd54807d96e8462cb 100755 (executable)
@@ -319,6 +319,8 @@ int mv_inference_pose_estimation_detect(
        mv_source_h source,
        mv_inference_h infer,
        mv_rectangle_s *roi,
+       float scale_width,
+       float scale_height,
        mv_inference_pose_estimation_detected_cb detected_cb,
        void *user_data)
 {
@@ -336,8 +338,8 @@ int mv_inference_pose_estimation_detect(
        ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data);
        */
 #else
-
-       ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data);
+       LOGE("%p", user_data);
+       ret = mv_inference_pose_estimation_detect_open(source, infer, roi, scale_width, scale_height, detected_cb, user_data);
 
        MEDIA_VISION_FUNCTION_LEAVE();
 
index 2de002a2aea2d22c71bd5648fd64fbc6905b9479..957bbd7c39316bbde9e830cec34d4449a6b91e6b 100755 (executable)
@@ -23,6 +23,9 @@
 #include <unistd.h>
 #include <string>
 
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+
 using namespace mediavision::inference;
 
 static int check_mv_inference_engine_version(mv_engine_config_h engine_config, bool *is_new_version)
@@ -811,6 +814,8 @@ int mv_inference_pose_estimation_detect_open(
        mv_source_h source,
        mv_inference_h infer,
        mv_rectangle_s *roi,
+       float scale_width,
+       float scale_height,
        mv_inference_pose_estimation_detected_cb detected_cb,
        void *user_data)
 {
@@ -843,12 +848,40 @@ int mv_inference_pose_estimation_detect_open(
 
        std::vector<mv_point_s> locations(numberOfPoseEstimation);
 
+       unsigned int tmpWidth = 0;
+       unsigned int tmpHeight = 0;
+       unsigned char *buffer = NULL;
+       unsigned int size = 0;
+
+       mv_source_h* tmpSource = (mv_source_h*)(user_data);
+       cv::Mat dumpMap;
+       if(user_data) {
+               mv_source_get_width(*tmpSource, &tmpWidth);
+               mv_source_get_height(*tmpSource, &tmpHeight);
+               LOGE("%d, %d", tmpWidth, tmpHeight);
+               mv_source_get_buffer(*tmpSource, &buffer, &size);
+               LOGE("%p", buffer);
+               dumpMap = cv::Mat(cv::Size(225,225), CV_8UC3, buffer);
+       } else {
+               LOGE("user_data is NULL");
+       }
+
        for (int n = 0; n < numberOfPoseEstimation; ++n) {
 
-               locations[n].x = poseEstimationResults.locations[n].x;
-               locations[n].y = poseEstimationResults.locations[n].y;
-       }
+               locations[n].x = (int)(poseEstimationResults.locations[n].x * scale_width);
+               locations[n].y = (int)(poseEstimationResults.locations[n].y * scale_height);
 
+               cv::Point point((int)(poseEstimationResults.locations[n].x * scale_width),
+                                               (int)(poseEstimationResults.locations[n].y * scale_height));
+               //cv::drawMarker(dumpMap, point, cv::Scalar(0,255,0), cv::MARKER_DIAMOND, );
+               if(user_data) {
+                       cv::circle(dumpMap, point, 1, cv::Scalar(0,255,0), 2);
+               }
+       }
+       if (user_data) {
+               cv::cvtColor(dumpMap, dumpMap, cv::COLOR_RGB2BGR);
+               cv::imwrite("/tmp/dumpOut.jpg", dumpMap);
+       }
        detected_cb(source, numberOfPoseEstimation, locations.data(), user_data);
 
        return ret;
@@ -884,17 +917,32 @@ int mv_inference_hand_detect_open(
 
        numberOfOutputs = handDetectionResults.number_of_hands;
 
+       LOGW("numberOfOutputs: %d", numberOfOutputs);
        float *confidences = handDetectionResults.confidences.data();
+       LOGW("done");
        std::vector<mv_rectangle_s> locations(numberOfOutputs);
 
-       for (int n = 0; n < numberOfOutputs; ++n) {
-               locations[n].point.x = handDetectionResults.locations[n].x;
-               locations[n].point.y = handDetectionResults.locations[n].y;
-               locations[n].width = handDetectionResults.locations[n].width;
-               locations[n].height = handDetectionResults.locations[n].height;
+       LOGE("user_data:%p", user_data);
+       LOGE("outputTensorData:%p", handDetectionResults.outputTensorData);
+       if (user_data) {
+               locations.clear();
+               std::vector<mv_rectangle_s>().swap(locations);
+               if (handDetectionResults.outputTensorData) {
+                       LOGW("try to get outputTensorData: %zd", sizeof(float));
+                       memcpy(user_data, handDetectionResults.outputTensorData, sizeof(float)*(56*56*21));
+               } else {
+                       LOGW("outputTensorData is NULL");
+               }
+       } else {
+               for (int n = 0; n < numberOfOutputs; ++n) {
+                       locations[n].point.x = handDetectionResults.locations[n].x;
+                       locations[n].point.y = handDetectionResults.locations[n].y;
+                       locations[n].width = handDetectionResults.locations[n].width;
+                       locations[n].height = handDetectionResults.locations[n].height;
+               }
        }
 
-       detected_cb(source, numberOfOutputs, confidences, locations.data(), user_data);
+       detected_cb(source, numberOfOutputs, confidences, user_data == NULL ? locations.data() : NULL, user_data);
 
        return ret;
-}
\ No newline at end of file
+}
index 2fdf73f4716d4bf00ba15b2ccd01c4306b5fe0fc..9e15c79278ae3149ab2362270c87125cd95feb87 100644 (file)
@@ -25,11 +25,22 @@ BuildRequires: libavutil-devel
 BuildRequires: pkgconfig(gstreamer-1.0)
 BuildRequires: pkgconfig(gstreamer-base-1.0)
 BuildRequires: pkgconfig(gstreamer-app-1.0)
+BuildRequires: pkgconfig(gstreamer-video-1.0)
+BuildRequires: pkgconfig(cairo)
 BuildRequires: pkgconfig(libtzplatform-config)
 BuildRequires: pkgconfig(iniparser)
 BuildRequires: pkgconfig(ncurses)
 BuildRequires: pkgconfig(inference-engine-interface-common)
 
+BuildRequires:  pkgconfig(tizen-extension-client)
+BuildRequires:  pkgconfig(elementary)
+BuildRequires:  pkgconfig(ecore)
+BuildRequires:  pkgconfig(evas)
+BuildRequires:  pkgconfig(ecore-wl2)
+BuildRequires:  pkgconfig(ecore-evas)
+BuildRequires:  pkgconfig(appcore-efl)
+BuildRequires:  capi-ui-efl-util-devel
+
 %description
 Media Vision library for Tizen Native API. Includes barcode detecting, barcode generating, face and image modules.
 
@@ -279,6 +290,7 @@ install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj
 %TZ_SYS_BIN/mv_image*
 %TZ_SYS_BIN/mv_surveillance*
 %TZ_SYS_BIN/mv_infer*
+%TZ_SYS_BIN/mv_stream*
 
 %if 0%{?gcov:1}
 %files gcov
index c01536d0dae6f9274e26d0bd795da9e7c5fa81b3..80be87799d0efa81e78071521370bfebba8556d6 100644 (file)
@@ -80,6 +80,29 @@ int mv_source_fill_by_buffer(
        return ret;
 }
 
+int mv_source_fill_by_tensor_buffer(
+               mv_source_h source,
+               void *data_buffer,
+               mv_inference_data_type_e type,
+               unsigned int buffer_size,
+               unsigned int width,
+               unsigned int height,
+               unsigned int channel,
+               unsigned int dimension)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_fill_by_tensor_buffer_c(
+               source, data_buffer, type, buffer_size, width, height,
+               channel, dimension);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_source_clear(
                mv_source_h source)
 {
@@ -110,6 +133,23 @@ int mv_source_get_buffer(
        return ret;
 }
 
+int mv_source_get_tensor_buffer(
+               mv_source_h source,
+               void **data_buffer,
+               unsigned int *buffer_size)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(data_buffer);
+       MEDIA_VISION_NULL_ARG_CHECK(buffer_size);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_get_tensor_buffer_c(source, data_buffer, buffer_size);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_source_get_height(
                mv_source_h source,
                unsigned int *image_height)
@@ -140,6 +180,36 @@ int mv_source_get_width(
        return ret;
 }
 
+int mv_source_get_channel(
+               mv_source_h source,
+               unsigned int *channel)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(channel);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_get_channel_c(source, channel);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_source_get_dimension(
+               mv_source_h source,
+               unsigned int *dims)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_NULL_ARG_CHECK(dims);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       int ret = mv_source_get_dimension_c(source, dims);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_source_get_colorspace(
                mv_source_h source,
                mv_colorspace_e *image_colorspace)
@@ -155,6 +225,19 @@ int mv_source_get_colorspace(
        return ret;
 }
 
+
+bool mv_source_is_tensor(mv_source_h source)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+       bool ret = mv_source_is_tensor_c(source);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
 int mv_create_engine_config(
                mv_engine_config_h *engine_cfg)
 {
index 46fe8cb3307e07c271240b7163066d1684e70cec..efeb49cdca166130fa3c3ff3e0319b481d743938 100644 (file)
@@ -318,6 +318,8 @@ int mv_inference_pose_estimation_detect(
        mv_source_h source,
        mv_inference_h infer,
        mv_rectangle_s *roi,
+       float scale_width,
+       float scale_height,
        mv_inference_pose_estimation_detected_cb detected_cb,
        void *user_data)
 {
@@ -335,11 +337,41 @@ int mv_inference_pose_estimation_detect(
        ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data);
        */
 #else
+       LOGE("%p", user_data);
+       ret = mv_inference_pose_estimation_detect_open(source, infer, roi, scale_width, scale_height, detected_cb, user_data);
 
-       ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data);
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+#endif
+}
+
+int mv_inference_hand_detect(
+       mv_source_h source,
+       mv_inference_h infer,
+       mv_inference_hand_detected_cb detected_cb,
+       void *user_data)
+{
+       MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_INSTANCE_CHECK(infer);
+       MEDIA_VISION_NULL_ARG_CHECK(detected_cb);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT
+       /*
+       ret = mv_inference_hand_detect_lic(source, infer, detected_cb, user_data);
+       */
+#else
+
+       ret = mv_inference_hand_detect_open(source, infer, detected_cb, user_data);
 
        MEDIA_VISION_FUNCTION_LEAVE();
 
        return ret;
+
 #endif
 }
\ No newline at end of file
index 389e6118f7f46bfb2ba4934e5a9f0d53e6843d6d..29d6cfb3fd1c4b55aa11672f69852413980cea4b 100644 (file)
@@ -11,3 +11,4 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/face)
 add_subdirectory(${PROJECT_SOURCE_DIR}/image)
 add_subdirectory(${PROJECT_SOURCE_DIR}/surveillance)
 add_subdirectory(${PROJECT_SOURCE_DIR}/inference)
+add_subdirectory(${PROJECT_SOURCE_DIR}/stream_infer)
index 648c085f8a5670b7eeea84c57fcd5d40dc181a55..420ac9812d49ca3f0ee3b78e7b956fcb22b408be 100644 (file)
@@ -31,6 +31,7 @@
 #include <limits.h>
 #include <time.h>
 
+
 #define FILE_PATH_SIZE 1024
 
 //Image Classification
 //Pose Estimation
 #define PE_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite"
 
+#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite"
+#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite"
+//#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_tf113_int32.tflite"
+//#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1_tf113.tflite"
+
 /******
  * Public model:
  *  IC: mobilenet caffe, tf?
@@ -82,6 +88,8 @@
 #define NANO_PER_MILLI  ((__clock_t) 1000000)
 #define MILLI_PER_SEC  ((__clock_t) 1000)
 
+static void * outputTensorData = NULL;
+
 struct timespec diff(struct timespec start, struct timespec end)
 {
     struct timespec temp;
@@ -166,21 +174,149 @@ void _pose_estimation_detected_cb (
     }
 }
 
+
+int perform_tflite_hand_detection2(mv_engine_config_h *engine_cfg)
+{
+    int err = MEDIA_VISION_ERROR_NONE;
+
+    mv_engine_config_h handle = NULL;
+    err = mv_create_engine_config(&handle);
+    if (err != MEDIA_VISION_ERROR_NONE) {
+        printf("Fail to create engine configuration handle.\n");
+        if (handle) {
+            int err2 = mv_destroy_engine_config(handle);
+            if (err2 != MEDIA_VISION_ERROR_NONE) {
+                printf("Fail to destroy engine cofniguration.\n");
+            }
+        }
+        return err;
+    }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AIC_2_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(handle,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(handle,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(handle,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(handle,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    *engine_cfg = handle;
+    return err;
+}
+
+void _hand_pose_cb (
+        mv_source_h source,
+        const int number_of_pose_estimation,
+        const mv_point_s *locations,
+        void *user_data)
+{
+    printf("In callback, %d pose estimation\n", number_of_pose_estimation);
+    if (!user_data) {
+        for (int n = 0; n < number_of_pose_estimation; n++) {
+            printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y);
+        }
+    } else {
+        printf("%p\n", user_data);
+    }
+}
+
 void _hand_detected_cb (
         mv_source_h source,
         const int number_of_hands,
         const float *confidences,
         const mv_rectangle_s *locations,
-        void *user_data)
+        void *user_data) //user_data  can be mv_source?
 {
     printf("In callback: %d hands\n", number_of_hands);
 
-    for (int n = 0; n < number_of_hands; n++) {
-        printf("%.3f\n", confidences[n]);
-        printf("%d,%d,%d,%d\n", locations[n].point.x,
-                                locations[n].point.y,
-                                locations[n].width,
-                                locations[n].height);
+    if (!user_data) {
+        for (int n = 0; n < number_of_hands; n++) {
+            printf("%.3f\n", confidences[n]);
+            printf("%d,%d,%d,%d\n", locations[n].point.x,
+                                    locations[n].point.y,
+                                    locations[n].width,
+                                    locations[n].height);
+        }
+    } else {
+        printf("%p\n", user_data);
+
+        mv_source_h source2;
+        mv_create_source(&source2);
+        mv_source_fill_by_tensor_buffer(source2, user_data, MV_INFERENCE_DATA_FLOAT32,
+                            56 * 56 * 21* sizeof(float),
+                            56, 56, 21, 3);
+
+        mv_engine_config_h engine_cfg2;
+        mv_create_engine_config(&engine_cfg2);
+
+        perform_tflite_hand_detection2(&engine_cfg2);
+
+        mv_inference_h infer2;
+        int err = mv_inference_create(&infer2);
+        if (err != MEDIA_VISION_ERROR_NONE) {
+            printf("Fail to create inference handle [err:%i]\n", err);
+        }
+        printf("infer2 created\n");
+
+        //configure
+        err = mv_inference_configure(infer2, engine_cfg2);
+        if (err != MEDIA_VISION_ERROR_NONE) {
+            printf("Fail to configure inference handle [err:%i]\n", err);
+        }
+        printf("engine_cfg2 configured\n");
+
+        //prepare
+        err = mv_inference_prepare(infer2);
+        printf("infer2 prepared\n");
+
+        err = mv_inference_pose_estimation_detect(source2, infer2, NULL, 225.f, 225.f,_hand_pose_cb, (&source));
+        printf("pose estimated\n");
+
+        mv_destroy_source(source2);
+        printf("destroy source2");
+
+        mv_inference_destroy(infer2);
+        mv_destroy_engine_config(engine_cfg2);
     }
 
 }
@@ -2552,7 +2688,7 @@ int perform_pose_estimation_detection()
             clock_gettime(CLOCK_MONOTONIC, &s_tspec);
 
             // Pose estimation
-            err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _pose_estimation_detected_cb, NULL);
+            err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, 1.f, 1.f, _pose_estimation_detected_cb, NULL);
 
             clock_gettime(CLOCK_MONOTONIC, &e_tspec);
 
@@ -2636,9 +2772,10 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
     char *inputNodeName = "input";
     char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
 
+    outputTensorData = (void*)calloc(56*56*21, sizeof(float));
     mv_engine_config_set_string_attribute(handle,
                         MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-                        HD_TFLITE_WEIGHT_PATH);
+                        PE_TFLITE_AIC_1_WEIGHT_PATH);
 
        mv_engine_config_set_int_attribute(handle,
                         MV_INFERENCE_INPUT_DATA_TYPE,
@@ -2685,84 +2822,14 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg)
     return err;
 }
 
-int perform_armnn_hand_detection(mv_engine_config_h *engine_cfg)
-{
-    int err = MEDIA_VISION_ERROR_NONE;
-
-    mv_engine_config_h handle = NULL;
-    err = mv_create_engine_config(&handle);
-    if (err != MEDIA_VISION_ERROR_NONE) {
-        printf("Fail to create engine configuration handle.\n");
-        if (handle) {
-            int err2 = mv_destroy_engine_config(handle);
-            if (err2 != MEDIA_VISION_ERROR_NONE) {
-                printf("Fail to destroy engine cofniguration.\n");
-            }
-        }
-        return err;
-    }
-
-    char *inputNodeName = "input";
-    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
-
-    mv_engine_config_set_string_attribute(handle,
-                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
-                        HD_TFLITE_WEIGHT_PATH);
-
-       mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_DATA_TYPE,
-                        MV_INFERENCE_DATA_FLOAT32);
-
-    mv_engine_config_set_double_attribute(handle,
-                        MV_INFERENCE_MODEL_MEAN_VALUE,
-                        0.0);
-
-    mv_engine_config_set_double_attribute(handle,
-                        MV_INFERENCE_MODEL_STD_VALUE,
-                        1.0);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_BACKEND_TYPE,
-                        MV_INFERENCE_BACKEND_ARMNN);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_TARGET_DEVICE_TYPE,
-                        MV_INFERENCE_TARGET_DEVICE_CPU);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
-                        224);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
-                        224);
-
-    mv_engine_config_set_int_attribute(handle,
-                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
-                        3);
-
-    mv_engine_config_set_string_attribute(handle,
-                        MV_INFERENCE_INPUT_NODE_NAME,
-                        inputNodeName);
-
-    mv_engine_config_set_array_string_attribute(handle,
-                        MV_INFERENCE_OUTPUT_NODE_NAMES,
-                        outputNodeNames,
-                        2);
-
-    *engine_cfg = handle;
-    return err;
-}
-
 int perform_hand_detection()
 {
     int err = MEDIA_VISION_ERROR_NONE;
 
     int sel_opt = 0;
-    const int options[6] = {1, 2, 3, 4, 5, 6};
-    const *names[6] = { "Configuration",
+    const int options[5] = {1, 2, 3, 4, 5};
+    const *names[5] = { "Configuration",
                                                "TFLITE(CPU) + HandDetection",
-                        "ARMNN(CPU) + HandDetection",
                         "Prepare",
                         "Run",
                         "Back"};
@@ -2772,7 +2839,7 @@ int perform_hand_detection()
     mv_source_h mvSource = NULL;
 
     while(sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 6);
+        sel_opt = show_menu("Select Action:", options, names, 5);
         switch (sel_opt) {
         case 1:
         {
@@ -2798,17 +2865,6 @@ int perform_hand_detection()
         }
             break;
         case 3:
-        {
-            //perform pose estimation config
-            if (engine_cfg) {
-                int err2 = mv_destroy_engine_config(engine_cfg);
-                if (err2 != MEDIA_VISION_ERROR_NONE)
-                    printf("Fail to destroy engine_cfg [err:%i]\n", err2);
-            }
-            err = perform_armnn_hand_detection(&engine_cfg);
-        }
-            break;
-        case 4:
         {
             // create - configure - prepare
             if (infer) {
@@ -2841,7 +2897,7 @@ int perform_hand_detection()
             }
         }
             break;
-        case 5:
+        case 4:
         {
             if (mvSource) {
                 int err2 = mv_destroy_source(mvSource);
@@ -2878,7 +2934,11 @@ int perform_hand_detection()
             clock_gettime(CLOCK_MONOTONIC, &s_tspec);
 
             // Hand detection
-                       err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL);
+                       //err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL);
+
+            printf("mem: %p\n", outputTensorData);
+            //err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _hand_pose_cb, outputTensorData);
+            err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, outputTensorData);
 
             clock_gettime(CLOCK_MONOTONIC, &e_tspec);
 
@@ -2888,7 +2948,7 @@ int perform_hand_detection()
 
             break;
         }
-        case 6:
+        case 5:
         {
             //perform destroy
             if (engine_cfg) {
@@ -2904,6 +2964,11 @@ int perform_hand_detection()
                     printf("Fail to destroy inference handle [err:%i]\n", err);
                 }
             }
+
+            if (outputTensorData) {
+                free(outputTensorData);
+                outputTensorData = NULL;
+            }
         }
             break;
         default:
@@ -2939,6 +3004,12 @@ int perform_hand_detection()
         sel_opt = (do_another == 1) ? 0 : 1;
     }
 
+    if (outputTensorData) {
+        free(outputTensorData);
+        outputTensorData = NULL;
+    }
+    printf("outputTensorData: %p\n",outputTensorData);
+
     return MEDIA_VISION_ERROR_NONE;
 }
 
@@ -2957,7 +3028,7 @@ int main()
 
     int err = MEDIA_VISION_ERROR_NONE;
     while (sel_opt == 0) {
-        sel_opt = show_menu("Select Action:", options, names, 6);
+        sel_opt = show_menu("Select Action:", options, names, 7);
         switch (sel_opt) {
         case 1:
         {
diff --git a/test/testsuites/stream_infer/CMakeLists.txt b/test/testsuites/stream_infer/CMakeLists.txt
new file mode 100644 (file)
index 0000000..dc0ab1c
--- /dev/null
@@ -0,0 +1,63 @@
+project(mv_stream_infer)
+cmake_minimum_required(VERSION 2.6)
+
+set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG    _DEBUG)
+
+if(NOT SKIP_WARNINGS)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
+endif()
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR})
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+include_directories(${PROJECT_SOURCE_DIR})
+include_directories(${MV_CAPI_MEDIA_VISION_INC_DIR})
+include_directories(${INC_IMAGE_HELPER})
+include_directories(${INC_VIDEO_HELPER})
+include_directories(${INC_TS_COMMON})
+
+file(GLOB MV_STREAMINFER_TEST_SUITE_INC_LIST "${PROJECT_SOURCE_DIR}/*.h")
+file(GLOB MV_STREAMINFER_TEST_SUITE_SRC_LIST "${PROJECT_SOURCE_DIR}/*.c")
+
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(GLIB_PKG glib-2.0)
+
+if (NOT GLIB_PKG_FOUND)
+    message(SEND_ERROR "Failed to find glib")
+    return()
+else()
+    include_directories(${GLIB_PKG_INCLUDE_DIRS})
+endif()
+
+
+SET(dependents "gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0 cairo elementary ecore-wl2 appcore-efl capi-ui-efl-util")
+
+INCLUDE(FindPkgConfig)
+pkg_check_modules(${PROJECT_NAME} REQUIRED ${dependents})
+FOREACH(flag ${${PROJECT_NAME}_CFLAGS})
+  SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}")
+ENDFOREACH(flag)
+
+SET(CMAKE_C_FLAGS "-I./include -I./include/headers ${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} -fPIC -Wall -DEFL_BETA_API_SUPPORT=1")
+SET(CMAKE_C_FLAGS_DEBUG "-O0 -g")
+
+add_executable(${PROJECT_NAME}
+               ${MV_STREAMINFER_TEST_SUITE_INC_LIST}
+               ${MV_STREAMINFER_TEST_SUITE_SRC_LIST}
+               ${MV_CAPI_MEDIA_VISION_INC_LIST})
+
+target_link_libraries(${PROJECT_NAME} ${MV_INFERENCE_LIB_NAME}
+                                      gstreamer-1.0
+                                      glib-2.0
+                                      capi-system-info
+                                      dlog
+                                      mv_image_helper
+                                      mv_video_helper
+                                      mv_testsuite_common
+                                      cairo
+                                      m
+                                                                         ${${PROJECT_NAME}_LIBRARIES}
+                                                                         )
+
+install(TARGETS ${PROJECT_NAME} DESTINATION ${testbin_dir})
diff --git a/test/testsuites/stream_infer/stream_infer.c b/test/testsuites/stream_infer/stream_infer.c
new file mode 100644 (file)
index 0000000..dbd8b68
--- /dev/null
@@ -0,0 +1,1772 @@
+/**
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _USE_MATH_DEFINES
+#if 0
+#include <evemu.h>
+#endif
+#include <fcntl.h>
+#include <mv_common.h>
+#include <mv_inference.h>
+
+#include <mv_testsuite_common.h>
+
+#include <image_helper.h>
+#include <mv_video_helper.h>
+
+#include <mv_log_cfg.h>
+
+#include <math.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+#define MAX(a, b) \
+({ __typeof__ (a) _a = (a); \
+__typeof__ (b) _b = (b); \
+_a > _b ? _a : _b; })
+
+#define MIN(a,b) \
+({ __typeof__ (a) _a = (a); \
+__typeof__ (b) _b = (b); \
+_a < _b ? _a : _b; })
+
+#include <glib-2.0/glib.h>
+#include <gst/gst.h>
+#include <gst/video/video.h>
+#include <cairo.h>
+#include <cairo-gobject.h>
+
+#include <Elementary.h>
+#include <appcore-efl.h>
+#include <Ecore.h>
+#include <Ecore_Evas.h>
+#include <Ecore_Wl2.h>
+#include <tizen-extension-client-protocol.h>
+#include <efl_util.h>
+
+#include <gst/gst.h>
+#include <gst/video/videooverlay.h>
+#include <unistd.h>
+#include <time.h>
+
+#define WIDTH  (480)
+#define HEIGHT (270)
+
+#ifdef PACKAGE
+#undef PACKAGE
+#endif
+#define PACKAGE "test"
+
+static int st = 0;
+static Evas_Object *g_eo = NULL;
+static Evas_Object *icon = NULL;
+
+/* for video display */
+static Evas_Object *g_win_id;
+static Evas_Object *selected_win_id;
+
+typedef enum {
+       MODEL_TYPE_POSE_CPM = 0,
+       MODEL_TYPE_POSE_HOURGLASS,
+       MODEL_TYPE_POSE_HAND_AIC,
+       MODEL_TYPE_POSE_HAND_AICLite
+};
+
+typedef struct {
+       gchar *filename;
+       gchar *filename2;
+       int numbuffers;
+       int modelType;
+       Evas_Object *win;
+       Evas_Object *layout_main;       /* layout widget based on EDJ */
+       /* add more variables here */
+
+} appdata;
+
+static mv_rectangle_s poseRoi;
+
+static appdata ad;
+static GstBus *bus;
+static guint bus_watch_id;
+
+#define FILE_PATH_SIZE 1024
+
+// pose estimation
+#define PE_TFLITE_CPM_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model_cpm.tflite"
+#define PE_TFLITE_HOURGLASS_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model_hourglass.tflite"
+
+#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite"
+#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite"
+#define PE_TFLITE_AICLite_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_lite_224.tflite"
+#define PE_TFLITE_AICLite_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_lite_224.tflite"
+
+static float thValNeck = 0.3f; // 15%
+static float thValArm = 0.1f; // 10 %
+static float thValLeg = 0.2f; // 5%
+
+typedef struct _rect {
+  int left;
+  int top;
+  int right;
+  int bottom;
+  int type;
+  bool updated;
+  bool cr_updated;
+} rect;
+
+
+typedef struct _humanSkeleton {
+  mv_point_s pose[21/*14*/];
+  mv_point_s prevPose[21/*14*/];
+  mv_rectangle_s loc;
+  mv_rectangle_s prevLoc;
+  mv_rectangle_s handRoi;
+  float scores[21/*14*/];
+  bool isPrevPose;
+  bool updated;    // detection is find and pose is also good. then update drawing
+  bool locUpdated; // track previous detection
+  bool IsDetected; // detection
+} HumanSkeleton;
+
+typedef struct
+{
+  gboolean valid;
+  GstVideoInfo vinfo;
+} CairoOverlayState;
+
+typedef struct
+{
+  GstBuffer *buffer;
+  gpointer user_data;
+} HandOffData;
+
+//gchar *gesturenames;
+
+static HandOffData hdata_p;
+
+static GMutex pose_mutex;
+static guint old_timeout = 0;
+static guint nFrames = 0;
+
+// Gstreamer
+GstElement *pipeline, *source, *filter, *toverlay, *sink, *sink2;
+GstElement *tee, *vscale, *vsfilter, *vconv, *vcfilter;
+GstElement *vrate, *vrfilter, *fsink, *vrsink;
+GstElement *queue1, *queue2, *queue3;
+GstElement *oconv, *coverlay;
+GstElement *vcrop, *vcrscale, *vcrsfilter, *vcrsconv, *vcrscfilter, *vcrssink;
+CairoOverlayState *overlay_state;
+
+GstElement *decodebin, *dscale, *dconv;
+GstElement *dsfilter, *dcfilter;
+
+GstElement *tee2, *enc, *muxmp4, *fsink2, *queue4, *queue5, *encconv;
+
+//static HandSkeleton handSkeleton;
+static HumanSkeleton humanSkeleton;
+gulong handler_p;
+GList *line_list = NULL;
+
+mv_source_h mv_src_p;
+mv_source_h mv_src_p2;
+
+// Human pose
+mv_engine_config_h hp_mv_engine_cfg;
+mv_inference_h hp_mv_infer;
+
+mv_engine_config_h hp_mv_engine_cfg2;
+mv_inference_h hp_mv_infer2;
+
+static void * outputTensorData;
+
+FILE *fp;
+
+static bool IsGestureMode;
+
+static int poseCropSize = 0;
+
+#define IMAGE_SIZE_WIDTH 640
+#define IMAGE_SIZE_HEIGHT 480
+
+#define NANO_PER_SEC ((__clock_t) 1000000000)
+#define NANO_PER_MILLI  ((__clock_t) 1000000)
+#define MILLI_PER_SEC  ((__clock_t) 1000)
+
+struct timespec diff(struct timespec start, struct timespec end)
+{
+    struct timespec temp;
+    if ((end.tv_nsec - start.tv_nsec) < 0) {
+        temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+        temp.tv_nsec = NANO_PER_SEC + end.tv_nsec - start.tv_nsec;
+    }
+    else {
+        temp.tv_sec = end.tv_sec - start.tv_sec;
+        temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+    }
+    return temp;
+}
+
+unsigned long gettotalmillisec(const struct timespec time)
+{
+    return time.tv_sec * MILLI_PER_SEC + time.tv_nsec / NANO_PER_MILLI;
+}
+
+
+void int_handler(int sig)
+{
+       char c;
+
+       signal(sig, SIG_IGN);
+       while ((getchar()) != '\n');
+
+       printf(TEXT_YELLOW "Do you want to quit? [y/n]\n" TEXT_RESET);
+       c = getchar();
+       if (c == 'y' || c == "Y") {
+
+               g_signal_handler_disconnect(vrsink, handler_p);
+#if 0
+               g_signal_handler_disconnect(vcrssink, handler_gp);
+#endif
+
+               gst_element_send_event(pipeline, gst_event_new_eos());
+
+               sleep(4);
+
+
+               if (mv_src_p)
+                       mv_destroy_source(mv_src_p);
+
+               if (hp_mv_infer)
+                       mv_inference_destroy(hp_mv_infer);
+
+               if (hp_mv_engine_cfg)
+                       mv_destroy_engine_config(hp_mv_engine_cfg);
+
+               if (mv_src_p2)
+                       mv_destroy_source(mv_src_p2);
+
+               if (hp_mv_infer2)
+                       mv_inference_destroy(hp_mv_infer2);
+
+               if (hp_mv_engine_cfg)
+                       mv_destroy_engine_config(hp_mv_engine_cfg2);
+
+               if (outputTensorData) {
+                       free(outputTensorData);
+                       outputTensorData = NULL;
+               }
+               printf(TEXT_YELLOW "exit..\n" TEXT_RESET);
+               signal(SIGINT, SIG_DFL);
+               exit(0);
+       } else {
+               printf("no");
+               signal(SIGINT, int_handler);
+       }
+
+       getchar(); // Get new line character
+}
+
+void _hand_pose_cb (
+        mv_source_h source,
+        const int number_of_pose_estimation,
+        const mv_point_s *locations,
+        void *user_data)
+{
+       printf("%d landmarks\n", number_of_pose_estimation);
+       for (int n = 0; n < number_of_pose_estimation; ++n) {
+
+               humanSkeleton.pose[n].x = (int)((float)locations[n].x);
+               humanSkeleton.pose[n].y = (int)((float)locations[n].y);
+               humanSkeleton.scores[n] = 1.0f; /* confidences[n];*/
+
+               //printf("(%d,%d): %f\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y, confidences[n]);
+               //printf("(%d,%d)\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y);
+       }
+       humanSkeleton.IsDetected = true;
+
+}
+
+static void _hand_detected_cb (
+        mv_source_h source,
+        const int number_of_hands,
+        const float *confidences,
+        const mv_rectangle_s *locations,
+        void *user_data) //user_data  can be mv_source?
+{
+
+#if 0
+       if (0 /*confidences[1] < thValNeck*/) {
+               printf("lost pose\n");
+               humanSkeleton.IsDetected = false;
+               humanSkeleton.isPrevPose = false;
+               return;
+       }
+       
+       printf("%d landmarks, %d crop\n", number_of_landmarks, poseCropSize);
+       for (int n = 0; n < number_of_landmarks; ++n) {
+
+               humanSkeleton.pose[n].x = (int)((float)(locations[n].x + poseRoi.point.x) / (float)poseCropSize * 640.f);
+               humanSkeleton.pose[n].y = (int)((float)(locations[n].y + poseRoi.point.y) / (float)poseCropSize * 480.f);
+               humanSkeleton.scores[n] = 1.0f; /* confidences[n];*/
+
+               //printf("(%d,%d): %f\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y, confidences[n]);
+               printf("(%d,%d)\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y);
+       }
+       humanSkeleton.IsDetected = true;
+#else
+
+       if (number_of_hands <= 0) {
+               humanSkeleton.IsDetected = false;
+               return;
+       }
+
+
+       struct timespec s_tspec;
+       struct timespec e_tspec;
+
+       clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+       mv_source_clear(mv_src_p2);
+       mv_source_fill_by_tensor_buffer(mv_src_p2, user_data,
+                                       MV_INFERENCE_DATA_FLOAT32,
+                                       56 * 56 * 21 * sizeof(float),
+                                       56, 56, 21, 3);
+
+       clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+
+       struct timespec diffspec = diff(s_tspec, e_tspec);
+       unsigned long timeDiff = gettotalmillisec(diffspec);
+       printf("memcpy time: %lu(ms)\n", timeDiff);
+
+       clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+       mv_inference_pose_estimation_detect(mv_src_p2, hp_mv_infer2, NULL, 640.f, 480.f, _hand_pose_cb, NULL);
+
+       clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+
+       diffspec = diff(s_tspec, e_tspec);
+       timeDiff = gettotalmillisec(diffspec);
+       printf("pose_estimation time: %lu(ms)\n", timeDiff);
+       //humanSkeleton.IsDetected = true;
+#endif
+       return;
+}
+
+
+static gboolean
+run_pose (void *user_data)
+{
+       HandOffData *udata = (HandOffData *)user_data;
+       if (!GST_IS_BUFFER(udata->buffer))
+               return FALSE;
+
+       GstMapInfo map;
+
+       /*
+       gst_buffer_map(udata->buffer, &map, GST_MAP_READ);
+
+       mv_source_clear(mv_src_p);
+
+       mv_source_fill_by_buffer(mv_src_p, map.data, 192*192*3, 192, 192, MEDIA_VISION_COLORSPACE_RGB888);
+
+       gst_buffer_unmap(udata->buffer, &map);
+
+       clock_t start = clock();
+       mv_inference_pose_estimation_detect(mv_src_p, hp_mv_infer, &poseRoi, 1.f, 1.f,  _human_pose_cb, NULL);
+       clock_t end = clock();
+       */
+
+       gst_buffer_map(udata->buffer, &map, GST_MAP_READ);
+
+       mv_source_clear(mv_src_p);
+
+       mv_source_fill_by_buffer(mv_src_p, map.data, 224*224*3, 224, 224, MEDIA_VISION_COLORSPACE_RGB888);
+
+       gst_buffer_unmap(udata->buffer, &map);
+
+
+       struct timespec s_tspec;
+       struct timespec e_tspec;
+
+       void * outputTensorBuffer = (void*)udata->user_data;
+
+       clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+       // invoke tflite -> _hand_detected_cb -> memcpy output -> invoke tflite -> _pose_cb
+       mv_inference_hand_detect(mv_src_p, hp_mv_infer, _hand_detected_cb, outputTensorBuffer);
+
+       clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+       struct timespec diffspec = diff(s_tspec, e_tspec);
+       unsigned long timeDiff = gettotalmillisec(diffspec);
+       printf("detect + pose time: %lu(ms)\n", timeDiff);
+
+       return FALSE;
+
+}
+
+static void
+_pose_est_handoff(GstElement *object, GstBuffer *buffer, GstPad *pad, gpointer user_data)
+{
+
+       nFrames++;
+       hdata_p.buffer = buffer;
+       hdata_p.user_data = user_data;
+
+#if 0
+       if (nFrames % 15 == 0) {
+               g_mutex_lock(&pose_mutex);
+               g_idle_add (run_pose, &hdata_p);
+               g_mutex_unlock(&pose_mutex);
+       }
+#else
+       g_mutex_lock(&pose_mutex);
+       g_idle_add (run_pose, &hdata_p);
+       g_mutex_unlock(&pose_mutex);
+#endif
+}
+
+static void
+prepare_overlay (GstElement * overlay, GstCaps * caps, gpointer user_data)
+{
+       CairoOverlayState *state = (CairoOverlayState *) user_data;
+
+       state->valid = gst_video_info_from_caps (&state->vinfo, caps);
+}
+
+/*
+static void
+draw_overlay (GstElement * overlay, cairo_t * cr, guint64 timestamp,
+    guint64 duration, gpointer user_data)
+{
+       CairoOverlayState *s = (CairoOverlayState *) user_data;
+
+       if (!s->valid) {
+               printf("not ready draw_overlay");
+               return;
+       }
+
+       cairo_set_source_rgba(cr, 0.1, 0.9, 0.0, 0.7);
+       cairo_set_line_width(cr, 2.0);
+
+
+       if (!humanSkeleton.IsDetected)
+               return;
+
+
+       //
+       if (humanSkeleton.isPrevPose == false) {
+               humanSkeleton.prevPose[1] = humanSkeleton.pose[1];
+               // head - neck
+               if (humanSkeleton.scores[0] >= thValNeck ) {
+                       humanSkeleton.prevPose[0] = humanSkeleton.pose[0];
+               }
+
+               // right arm
+               if (humanSkeleton.scores[2] >= thValArm) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[2] = humanSkeleton.pose[2];
+                       if (humanSkeleton.scores[3] >= thValArm) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[3] = humanSkeleton.pose[3];
+                               if (humanSkeleton.scores[4] >= thValArm) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[4] = humanSkeleton.pose[4];
+                               }
+                       }
+               }
+
+               // left arm
+               if (humanSkeleton.scores[5] >= thValArm) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[5] = humanSkeleton.pose[5];
+                       if (humanSkeleton.scores[6] >= thValArm) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[6] = humanSkeleton.pose[6];
+                               if (humanSkeleton.scores[7] >= thValArm) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[7] = humanSkeleton.pose[7];
+                               }
+                       }
+               }
+
+               // right leg
+               if (humanSkeleton.scores[8] >= thValLeg) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[8] = humanSkeleton.pose[8];
+                       if (humanSkeleton.scores[9] >= thValLeg) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[9] = humanSkeleton.pose[9];
+                               if (humanSkeleton.scores[10] >= thValLeg) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[10] = humanSkeleton.pose[10];
+                               }
+                       }
+               }
+
+               // left leg
+               if (humanSkeleton.scores[11] >= thValLeg) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[11] = humanSkeleton.pose[11];
+                       if (humanSkeleton.scores[12] >= thValLeg) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[12] = humanSkeleton.pose[12];
+                               if (humanSkeleton.scores[13] >= thValLeg) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[13] = humanSkeleton.pose[13];
+                               }
+                       }
+               }
+               humanSkeleton.isPrevPose = true;
+       } else {
+               // weighted sum of pose and prevPose
+               // method1: fixed weights (pose : prevPose = 0.7 : 0.3)
+               float poseWeight = 0.7f;
+               float prevPoseWeight = 0.3f;
+               humanSkeleton.prevPose[1].x = (poseWeight * humanSkeleton.pose[1].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[1].x);
+               humanSkeleton.prevPose[1].y = (poseWeight * humanSkeleton.pose[1].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[1].y);
+               // head - neck
+               if (humanSkeleton.scores[0] >= thValNeck ) {
+                       humanSkeleton.prevPose[0].x = (poseWeight * humanSkeleton.pose[0].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[0].x);
+                       humanSkeleton.prevPose[0].y = (poseWeight * humanSkeleton.pose[0].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[0].y);
+               }
+
+               // right arm
+               if (humanSkeleton.scores[2] >= thValArm) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[2].x = (poseWeight * humanSkeleton.pose[2].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[2].x);
+                       humanSkeleton.prevPose[2].y = (poseWeight * humanSkeleton.pose[2].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[2].y);
+                       if (humanSkeleton.scores[3] >= thValArm) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[3].x = (poseWeight * humanSkeleton.pose[3].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[3].x);
+                               humanSkeleton.prevPose[3].y = (poseWeight * humanSkeleton.pose[3].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[3].y);
+                               if (humanSkeleton.scores[4] >= thValArm) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[4].x = (poseWeight * humanSkeleton.pose[4].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[4].x);
+                                       humanSkeleton.prevPose[4].y = (poseWeight * humanSkeleton.pose[4].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[4].y);
+                               }
+                       }
+               }
+
+               // left arm
+               if (humanSkeleton.scores[5] >= thValArm) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[5].x = (poseWeight * humanSkeleton.pose[5].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[5].x);
+                       humanSkeleton.prevPose[5].y = (poseWeight * humanSkeleton.pose[5].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[5].y);
+                       if (humanSkeleton.scores[6] >= thValArm) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[6].x = (poseWeight * humanSkeleton.pose[6].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[6].x);
+                               humanSkeleton.prevPose[6].y = (poseWeight * humanSkeleton.pose[6].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[6].y);
+                               if (humanSkeleton.scores[7] >= thValArm) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[7].x = (poseWeight * humanSkeleton.pose[7].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[7].x);
+                                       humanSkeleton.prevPose[7].y = (poseWeight * humanSkeleton.pose[7].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[7].y);
+                               }
+                       }
+               }
+
+               // right leg
+               if (humanSkeleton.scores[8] >= thValLeg) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[8].x = (poseWeight * humanSkeleton.pose[8].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[8].x);
+                       humanSkeleton.prevPose[8].y = (poseWeight * humanSkeleton.pose[8].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[8].y);
+                       if (humanSkeleton.scores[9] >= thValLeg) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[9].x = (poseWeight * humanSkeleton.pose[9].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[9].x);
+                               humanSkeleton.prevPose[9].y = (poseWeight * humanSkeleton.pose[9].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[9].y);
+                               if (humanSkeleton.scores[10] >= thValLeg) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[10].x = (poseWeight * humanSkeleton.pose[10].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[10].x);
+                                       humanSkeleton.prevPose[10].y = (poseWeight * humanSkeleton.pose[10].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[10].y);
+                               }
+                       }
+               }
+
+               // left leg
+               if (humanSkeleton.scores[11] >= thValLeg) {
+                       // neck - right shoulder
+                       humanSkeleton.prevPose[11].x = (poseWeight * humanSkeleton.pose[11].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[11].x);
+                       humanSkeleton.prevPose[11].y = (poseWeight * humanSkeleton.pose[11].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[11].y);
+                       if (humanSkeleton.scores[12] >= thValLeg) {
+                               // right shoulder - right elbow
+                               humanSkeleton.prevPose[12].x = (poseWeight * humanSkeleton.pose[12].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[12].x);
+                               humanSkeleton.prevPose[12].y = (poseWeight * humanSkeleton.pose[12].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[12].y);
+                               if (humanSkeleton.scores[13] >= thValLeg) {
+                                       // right elbow - right wrist
+                                       humanSkeleton.prevPose[13].x = (poseWeight * humanSkeleton.pose[13].x +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[13].x);
+                                       humanSkeleton.prevPose[13].y = (poseWeight * humanSkeleton.pose[13].y +
+                                                                        prevPoseWeight * humanSkeleton.prevPose[13].y);
+                               }
+                       }
+               }
+       }
+
+       //
+       //draw..
+       // head - neck
+       if (humanSkeleton.scores[0] >= thValNeck ) {
+               cairo_move_to(cr, humanSkeleton.prevPose[0].x, humanSkeleton.prevPose[0].y);
+               cairo_line_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+       }
+
+       // right arm
+       cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+       if (humanSkeleton.scores[2] >= thValArm) {
+               // neck - right shoulder
+               cairo_line_to(cr, humanSkeleton.prevPose[2].x, humanSkeleton.prevPose[2].y);
+               if (humanSkeleton.scores[3] >= thValArm) {
+                       // right shoulder - right elbow
+                       cairo_line_to(cr, humanSkeleton.prevPose[3].x, humanSkeleton.prevPose[3].y);
+                       if (humanSkeleton.scores[4] >= thValArm) {
+                               // right elbow - right wrist
+                               cairo_line_to(cr, humanSkeleton.prevPose[4].x, humanSkeleton.prevPose[4].y);
+                       }
+               }
+       }
+       cairo_stroke(cr);
+
+       // left arm
+       cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+       if (humanSkeleton.scores[5] >= thValArm) {
+               // neck - right shoulder
+               cairo_line_to(cr, humanSkeleton.prevPose[5].x, humanSkeleton.prevPose[5].y);
+               if (humanSkeleton.scores[6] >= thValArm) {
+                       // right shoulder - right elbow
+                       cairo_line_to(cr, humanSkeleton.prevPose[6].x, humanSkeleton.prevPose[6].y);
+                       if (humanSkeleton.scores[7] >= thValArm) {
+                               // right elbow - right wrist
+                               cairo_line_to(cr, humanSkeleton.prevPose[7].x, humanSkeleton.prevPose[7].y);
+                       }
+               }
+       }
+       cairo_stroke(cr);
+
+
+       // right leg
+       cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+       if (humanSkeleton.scores[8] >= thValLeg) {
+               // neck - right shoulder
+               cairo_line_to(cr, humanSkeleton.prevPose[8].x, humanSkeleton.prevPose[8].y);
+               if (humanSkeleton.scores[9] >= thValLeg) {
+                       // right shoulder - right elbow
+                       cairo_line_to(cr, humanSkeleton.prevPose[9].x, humanSkeleton.prevPose[9].y);
+                       if (humanSkeleton.scores[10] >= thValLeg) {
+                               // right elbow - right wrist
+                               cairo_line_to(cr, humanSkeleton.prevPose[10].x, humanSkeleton.prevPose[10].y);
+                       }
+               }
+       }
+       cairo_stroke(cr);
+
+       // left leg
+       cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y);
+       if (humanSkeleton.scores[11] >= thValLeg) {
+               // neck - right shoulder
+               cairo_line_to(cr, humanSkeleton.prevPose[11].x, humanSkeleton.prevPose[11].y);
+               if (humanSkeleton.scores[12] >= thValLeg) {
+                       // right shoulder - right elbow
+                       cairo_line_to(cr, humanSkeleton.prevPose[12].x, humanSkeleton.prevPose[12].y);
+                       if (humanSkeleton.scores[13] >= thValLeg) {
+                               // right elbow - right wrist
+                               cairo_line_to(cr, humanSkeleton.prevPose[13].x, humanSkeleton.prevPose[13].y);
+                       }
+               }
+       }
+       cairo_stroke(cr);
+}
+*/
+static void
+draw_overlay_hand (GstElement * overlay, cairo_t * cr, guint64 timestamp,
+    guint64 duration, gpointer user_data)
+{
+       CairoOverlayState *s = (CairoOverlayState *) user_data;
+
+       if (!s->valid) {
+               printf("not ready draw_overlay");
+               return;
+       }
+
+       cairo_set_source_rgba(cr, 0.1, 0.9, 0.0, 0.7);
+       cairo_set_line_width(cr, 2.0);
+
+
+       if (!humanSkeleton.IsDetected)
+               return;
+
+
+    // thumb - red
+       cairo_set_source_rgba (cr, 0.9, 0.1, 0.0, 0.7);
+       cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+       for (int k = 1 ; k < 5; ++k) {
+               cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+       }
+       cairo_stroke(cr);
+
+       // fore - red
+       cairo_set_source_rgba (cr, 0.9, 0.1, 0.0, 0.7);
+       cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+       for (int k = 5 ; k < 9; ++k) {
+               cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+       }
+       cairo_stroke(cr);
+
+       // middle - grean
+       cairo_set_source_rgba (cr, 0.1, 0.9, 0.0, 0.7);
+       cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+       for (int k = 9 ; k < 13; ++k) {
+               cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+       }
+       cairo_stroke(cr);
+
+       // ring - blue
+       cairo_set_source_rgba (cr, 0.1, 0.0, 0.9, 0.7);
+       cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+       for (int k = 13 ; k < 17; ++k) {
+               cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+       }
+       cairo_stroke(cr);
+
+       // little - purple
+       cairo_set_source_rgba (cr, 0.5, 0.0, 0.5, 0.7);
+       cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y);
+       for (int k = 17 ; k < 21; ++k) {
+               cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y);
+       }
+       cairo_stroke(cr);
+}
+
+static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data)
+{
+
+  switch (GST_MESSAGE_TYPE (msg)) {
+
+    case GST_MESSAGE_EOS:
+      printf ("End of stream\n");
+      break;
+
+    case GST_MESSAGE_ERROR: {
+      gchar  *debug;
+      GError *error;
+
+      gst_message_parse_error (msg, &error, &debug);
+      g_free (debug);
+
+      printf ("Error: %s\n", error->message);
+      g_error_free (error);
+
+      break;
+    }
+    default:
+      break;
+  }
+
+  return TRUE;
+}
+
+int perform_armnn_human_pose_cpm_configure(mv_engine_config_h mv_engine_cfg)
+{
+       if (mv_engine_cfg == NULL) {
+               printf("mv_engine_cfg is null\n");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       char *inputNodeName = "image";
+    char *outputNodeName[1] = {"Convolutional_Pose_Machine/stage_5_out"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_CPM_WEIGHT_PATH);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_ARMNN);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_TYPE,
+                        MV_INFERENCE_TARGET_GPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeName,
+                        1);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_armnn_human_pose_hourglass_configure(mv_engine_config_h mv_engine_cfg)
+{
+       if (mv_engine_cfg == NULL) {
+               printf("mv_engine_cfg is null\n");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       char *inputNodeName = "image";
+    char *outputNodeName[1] = {"hourglass_out_3"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_HOURGLASS_WEIGHT_PATH);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_ARMNN);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_TYPE,
+                        MV_INFERENCE_TARGET_GPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        192);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeName,
+                        1);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AIC(mv_engine_config_h mv_engine_cfg)
+{
+       if (mv_engine_cfg == NULL) {
+               printf("mv_engine_cfg is null\n");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"};
+
+    //outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AIC_1_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AIC2(mv_engine_config_h mv_engine_cfg)
+{
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AIC_2_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AICLite(mv_engine_config_h mv_engine_cfg)
+{
+       if (mv_engine_cfg == NULL) {
+               printf("mv_engine_cfg is null\n");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/boundingbox", "mobilenetv2/heatmap"};
+
+    //outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite_1_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        224);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        3);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int perform_tflite_hand_detection_AICLite2(mv_engine_config_h mv_engine_cfg)
+{
+    char *inputNodeName = "input";
+    char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"};
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
+                        PE_TFLITE_AICLite_2_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_DATA_TYPE,
+                        MV_INFERENCE_DATA_FLOAT32);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_MEAN_VALUE,
+                        0.0);
+
+    mv_engine_config_set_double_attribute(mv_engine_cfg,
+                        MV_INFERENCE_MODEL_STD_VALUE,
+                        1.0);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_BACKEND_TYPE,
+                        MV_INFERENCE_BACKEND_TFLITE);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_TARGET_DEVICE_TYPE,
+                        MV_INFERENCE_TARGET_DEVICE_CPU);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                        56);
+
+    mv_engine_config_set_int_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_TENSOR_CHANNELS,
+                        21);
+
+    mv_engine_config_set_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_INPUT_NODE_NAME,
+                        inputNodeName);
+
+    mv_engine_config_set_array_string_attribute(mv_engine_cfg,
+                        MV_INFERENCE_OUTPUT_NODE_NAMES,
+                        outputNodeNames,
+                        2);
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+static void __global(void *data, struct wl_registry *registry,
+       uint32_t name, const char *interface, uint32_t version)
+{
+       struct tizen_surface **tz_surface = NULL;
+
+       if (!data) {
+               printf("NULL data\n");
+               return;
+       }
+
+       tz_surface = (struct tizen_surface **)data;
+
+       if (!interface) {
+               printf("NULL interface\n");
+               return;
+       }
+
+       if (strcmp(interface, "tizen_surface") == 0) {
+               printf("binding tizen surface for wayland\n");
+
+               *tz_surface = wl_registry_bind(registry, name, &tizen_surface_interface, 1);
+               if (*tz_surface == NULL)
+                       printf("failed to bind\n");
+
+               printf("done\n");
+       }
+
+       return;
+}
+
+static void __global_remove(void *data, struct wl_registry *wl_registry, uint32_t name)
+{
+       printf("enter\n");
+       return;
+}
+
+static const struct wl_registry_listener _wl_registry_listener = {
+       __global,
+       __global_remove
+};
+
+void __parent_id_getter(void *data, struct tizen_resource *tizen_resource, uint32_t id)
+{
+       if (!data) {
+               printf("NULL data\n");
+               return;
+       }
+
+       *((unsigned int *)data) = id;
+
+       printf("[CLIENT] got parent_id [%u] from server\n", id);
+
+       return;
+}
+
+static const struct tizen_resource_listener _tz_resource_listener = {
+       __parent_id_getter
+};
+
+static void set_overlay(Ecore_Evas *ee)
+{
+   Ecore_Wl2_Window *window = NULL;
+   Ecore_Wl2_Display *e_wl2_display = NULL;
+
+   struct wl_display *display = NULL;
+   struct wl_display *display_wrapper = NULL;
+   struct wl_surface *surface = NULL;
+   struct wl_registry *registry = NULL;
+   struct wl_event_queue *queue = NULL;
+   struct tizen_surface *tz_surface = NULL;
+   struct tizen_resource *tz_resource = NULL;
+
+       window = ecore_evas_wayland2_window_get(ee);
+       if (!window) {
+               printf("failed to get wayland window\n");
+               goto _DONE;
+       }
+
+       /* set video_has flag to a video application window */
+       ecore_wl2_window_video_has(window, EINA_TRUE);
+
+       surface = (struct wl_surface *)ecore_wl2_window_surface_get(window);
+       if (!surface) {
+               printf("failed to get wayland surface\n");
+               goto _DONE;
+       }
+
+       e_wl2_display = ecore_wl2_connected_display_get(NULL);
+       if (!e_wl2_display) {
+               printf("failed to get ecore wl2 display\n");
+               goto _DONE;
+       }
+
+       display = (struct wl_display *)ecore_wl2_display_get(e_wl2_display);
+       if (!display) {
+               printf("failed to get wayland display\n");
+               goto _DONE;
+       }
+
+       display_wrapper = wl_proxy_create_wrapper(display);
+       if (!display_wrapper) {
+               printf("failed to create wl display wrapper\n");
+       }
+
+       queue = wl_display_create_queue(display);
+       if (!queue) {
+               printf("failed to create wl display queue\n");
+               goto _DONE;
+       }
+
+       wl_proxy_set_queue((struct wl_proxy *)display_wrapper, queue);
+
+       registry = wl_display_get_registry(display_wrapper);
+       if (!registry) {
+               printf("failed to get wayland registry\n");
+               goto _DONE;
+       }
+
+       wl_registry_add_listener(registry, &_wl_registry_listener, &tz_surface);
+
+       wl_display_dispatch_queue(display, queue);
+       wl_display_roundtrip_queue(display, queue);
+
+       if (!tz_surface) {
+               printf("failed to get tizen surface\n");
+               goto _DONE;
+       }
+
+       /* Get parent_id which is unique in a entire systemw. */
+       tz_resource = tizen_surface_get_tizen_resource(tz_surface, surface);
+       if (!tz_resource) {
+               printf("failed to get tizen resurce\n");
+               goto _DONE;
+       }
+
+       int parent_id = 0;
+
+       tizen_resource_add_listener(tz_resource, &_tz_resource_listener, &parent_id);
+
+       wl_display_roundtrip_queue(display, queue);
+
+       if (parent_id > 0) {
+               printf("parent id : %u\n", parent_id);
+
+       gst_video_overlay_set_wl_window_wl_surface_id(GST_VIDEO_OVERLAY(sink2), parent_id);
+    gst_element_set_state (pipeline, GST_STATE_PLAYING);
+       } else {
+               printf("failed to get parent id\n");
+       }
+
+_DONE:
+       if (tz_surface) {
+               tizen_surface_destroy(tz_surface);
+               tz_surface = NULL;
+       }
+
+       if (tz_resource) {
+               tizen_resource_destroy(tz_resource);
+               tz_resource = NULL;
+       }
+
+       if (registry) {
+               wl_registry_destroy(registry);
+               registry = NULL;
+       }
+
+       if (queue) {
+               wl_event_queue_destroy(queue);
+               queue = NULL;
+       }
+
+       if (display_wrapper) {
+               wl_proxy_wrapper_destroy(display_wrapper);
+               display_wrapper = NULL;
+       }
+}
+
+static void win_resize_cb (void *data, Evas *e, Evas_Object *obj, void *event_info)
+{
+   int w, h;
+   int wid;
+
+   printf("_canvas_resize_cb\n");
+
+   Evas * _e = evas_object_evas_get(obj);
+   Ecore_Evas *ee = ecore_evas_ecore_evas_get(_e);
+   ecore_evas_geometry_get(ee, NULL, NULL, &w, &h);
+
+   if (!st) {
+    set_overlay(ee);
+    int x, y;
+    elm_win_screen_position_get(obj, &x, &y);
+    printf("x = %d, y = %d\n", x, y);
+       } else
+           st++;
+}
+
+static void cb_new_pad (GstElement *element, GstPad *pad, gpointer data)
+{
+  gchar *name;
+  GstElement *other = data;
+
+  name = gst_pad_get_name (pad);
+  printf ("A new pad %s was created for %s\n", name, gst_element_get_name(element));
+  g_free (name);
+
+  printf ("element %s will be linked to %s\n",
+           gst_element_get_name(element),
+           gst_element_get_name(dscale));
+  gst_element_link(element, dscale);
+}
+
+static int app_create(void *data)
+{
+       appdata *ad = data;
+       Evas_Object *win = NULL;
+
+       // GST
+       g_mutex_init(&pose_mutex);
+
+       signal(SIGINT, int_handler);
+
+       /* initialization */
+       gst_init(NULL, NULL);
+
+       /* mediavision configuration*/
+       IsGestureMode = false;
+       if (hp_mv_engine_cfg == NULL) {
+               mv_create_engine_config(&hp_mv_engine_cfg);
+       }
+
+       if (hp_mv_engine_cfg2 == NULL) {
+               mv_create_engine_config(&hp_mv_engine_cfg2);
+       }
+
+       int err = MEDIA_VISION_ERROR_NONE;
+
+
+       if (ad->modelType == MODEL_TYPE_POSE_CPM) {
+               err = perform_armnn_human_pose_cpm_configure(hp_mv_engine_cfg);
+       } else if (ad->modelType == MODEL_TYPE_POSE_HOURGLASS) {
+               err = perform_armnn_human_pose_hourglass_configure(hp_mv_engine_cfg);
+       } else if (ad->modelType == MODEL_TYPE_POSE_HAND_AIC) {
+               outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+               err = perform_tflite_hand_detection_AIC(hp_mv_engine_cfg);
+
+               err = perform_tflite_hand_detection_AIC2(hp_mv_engine_cfg2);
+       } else {
+               outputTensorData = (void*)calloc(56*56*21, sizeof(float));
+               err = perform_tflite_hand_detection_AICLite(hp_mv_engine_cfg);
+
+               err = perform_tflite_hand_detection_AICLite2(hp_mv_engine_cfg2);
+       }
+
+       if (err != MEDIA_VISION_ERROR_NONE) {
+               printf("Error on perform_armnn_human_pose_configure");
+       }
+
+       printf("configuration done\n");
+
+       printf("loading pose model: ");
+       mv_inference_create(&hp_mv_infer);
+
+       mv_inference_configure(hp_mv_infer, hp_mv_engine_cfg);
+
+       clock_t start = clock();
+       mv_inference_prepare(hp_mv_infer);
+       clock_t end = clock();
+       printf("time: %2.3f\n", (double)(end - start)/CLOCKS_PER_SEC);
+
+
+       mv_inference_create(&hp_mv_infer2);
+
+       mv_inference_configure(hp_mv_infer2, hp_mv_engine_cfg2);
+
+       start = clock();
+       mv_inference_prepare(hp_mv_infer2);
+       end = clock();
+       printf("time: %2.3f\n", (double)(end - start)/CLOCKS_PER_SEC);
+
+       /* mediavision source */
+       mv_create_source(&mv_src_p);
+       mv_create_source(&mv_src_p2);
+
+       pipeline = gst_pipeline_new("app");
+
+       overlay_state = g_new0 (CairoOverlayState, 1);
+
+       /* create gstreamer elements */
+       if (!ad->filename) {
+               source = gst_element_factory_make("v4l2src", "src");
+               filter = gst_element_factory_make("capsfilter", "filter");
+       } else {
+               source = gst_element_factory_make("filesrc", "src");
+               
+               decodebin = gst_element_factory_make("decodebin", "dbin");
+               dscale = gst_element_factory_make("videoscale", "dscale");
+               dconv = gst_element_factory_make("videoconvert", "dconv");
+               dsfilter = gst_element_factory_make("capsfilter", "dsfilter");
+               dcfilter = gst_element_factory_make("capsfilter", "dcfilter");
+       }
+
+       if (ad->filename2) {
+               tee2 = gst_element_factory_make("tee", "tee2");
+               enc = gst_element_factory_make("avenc_mpeg4", "enc");
+               muxmp4 = gst_element_factory_make("mp4mux", "muxmp4");
+               fsink2 = gst_element_factory_make("filesink", "fsink2");
+               queue4 = gst_element_factory_make("queue", "queue4");
+               queue5 = gst_element_factory_make("queue", "queue5");
+               encconv = gst_element_factory_make("videoconvert", "encconv");
+       }
+
+
+       tee = gst_element_factory_make("tee", "tee");
+       queue1 = gst_element_factory_make("queue", "queue1");
+       queue2 = gst_element_factory_make("queue", "queue2");
+
+       if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) {
+               queue3 = gst_element_factory_make("queue", "queue3");
+       }
+
+       // queue1 - videoscale - capsfilter -viedoeconvert - capsfilter - videorate - capsfilter -fakesink
+       vscale = gst_element_factory_make("videoscale", "scale");
+       vsfilter = gst_element_factory_make("capsfilter", "vsfilter");
+       vconv = gst_element_factory_make("videoconvert", "convert");
+       vcfilter = gst_element_factory_make("capsfilter", "vcfilter");
+       vrate = gst_element_factory_make("videorate", "rate");
+       vrfilter = gst_element_factory_make("capsfilter", "vrfilter");
+       vrsink = gst_element_factory_make("fakesink", "vrsink");
+
+       // queue2 - videoconvert - cairooveray - tizenwlsink
+       oconv = gst_element_factory_make("videoconvert", "oconv");
+       coverlay = gst_element_factory_make("cairooverlay", "coverlay");
+       sink = gst_element_factory_make("fpsdisplaysink", "vsink");
+       sink2 = gst_element_factory_make("tizenwlsink", "vsink2");
+
+       // after detection, crop using video crop
+       // queue3 - videocrop - videoscale -capsfilter - videoconvert - capsfilter -fakesink
+
+       if (0/*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) {
+               vcrop = gst_element_factory_make("videocrop", "crop");
+               vcrscale = gst_element_factory_make("videoscale", "crscale");
+               vcrsfilter = gst_element_factory_make("capsfilter", "vcrsfilter");
+               vcrsconv = gst_element_factory_make("videoconvert", "vcrsconvert");
+               vcrscfilter = gst_element_factory_make("capsfilter", "vcrscfilter");
+               vcrssink = gst_element_factory_make("fakesink", "vcrssink");
+       }
+
+       if (!pipeline || !source ||
+               !tee || !queue1 || !vscale || !vsfilter || !vconv || !vcfilter ||
+               !vrate || !vrfilter || !vrsink ||
+               !queue2 || !oconv || !coverlay || !sink || !sink2) {
+               printf(TEXT_RED "One element(queue1 or queue2) might be not created. Exiting.\n" TEXT_RESET);
+               return -1;
+       }
+
+       if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC && (!pipeline || !queue3 
+               || !vcrop || !vcrscale || !vcrsfilter || !vcrsconv || !vcrscfilter || !vcrssink*/) {
+               printf(TEXT_RED "One element(queue3) might be not created. Exiting.\n" TEXT_RESET);
+               return -1;
+       }
+
+       if (!ad->filename) {
+               if (!filter) {
+                       printf(TEXT_RED "One element might be not created. Existing.\n" TEXT_RESET);
+                       return -1;
+               }
+               g_object_set(G_OBJECT(filter), "caps", gst_caps_from_string("video/x-raw, format=YUY2, width=640, height=480"), NULL);
+       } else {
+               if (!decodebin || !dscale || !dconv) {
+                       printf(TEXT_RED "One element might be not created. Exiting.\n" TEXT_RESET);
+                       return -1;
+               }
+               g_object_set(G_OBJECT(dsfilter), "caps", gst_caps_from_string("video/x-raw, width=640, height=480"), NULL);
+               g_object_set(G_OBJECT(dcfilter), "caps", gst_caps_from_string("video/x-raw, format=YUY2, width=640, height=480"), NULL);
+               g_signal_connect (decodebin, "pad-added", G_CALLBACK (cb_new_pad), NULL);
+       }
+
+       if (ad->filename2) {
+               if (!tee2 || !enc || !muxmp4 || !queue4 || !queue5 || !fsink2 || !encconv) {
+                       printf(TEXT_RED "One element might be not created. Exiting.\n" TEXT_RESET);
+                       printf("%p, %p, %p, %p, %p, %p, %p\n", tee2, enc, muxmp4, queue4, queue5, fsink2, encconv);
+                       return -1;
+               }
+               g_object_set(G_OBJECT(fsink2), "location", ad->filename2, NULL);
+
+       }
+
+       /* set up the pipeline */
+       //g_signal_connect (coverlay, "draw", G_CALLBACK (draw_overlay), overlay_state);
+       g_signal_connect (coverlay, "draw", G_CALLBACK (draw_overlay_hand), overlay_state);
+       
+       g_signal_connect (coverlay, "caps-changed", G_CALLBACK (prepare_overlay), overlay_state);
+
+       if (!ad->filename) {
+               g_object_set(G_OBJECT(source), "device", "/dev/video8", NULL); // 252
+       } else {
+               g_object_set(G_OBJECT(source), "location", ad->filename, NULL);
+               g_object_set(G_OBJECT(source), "num-buffers", ad->numbuffers, NULL);
+       }
+
+       g_object_set(G_OBJECT(sink2), "use-tbm", FALSE, NULL);
+       g_object_set(G_OBJECT(sink2), "sync", FALSE, NULL);
+       g_object_set(G_OBJECT(sink), "video-sink", sink2, NULL);
+       g_object_set(G_OBJECT(sink), "sync", FALSE, NULL);
+
+       if (ad->modelType == MODEL_TYPE_POSE_HAND_AIC ||
+               ad->modelType == MODEL_TYPE_POSE_HAND_AICLite) {
+               g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=224, height=224"), NULL);
+               poseCropSize = 224;
+       } else {
+               g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=192, height=192"), NULL);
+               poseCropSize = 192;
+       }
+
+       g_object_set(G_OBJECT(vcfilter), "caps", gst_caps_from_string("video/x-raw, format=RGB"), NULL);
+       g_object_set(G_OBJECT(vrfilter), "caps", gst_caps_from_string("video/x-raw, framerate=15/1"), NULL);
+
+       //g_object_set(G_OBJECT(vrate), "drop-only", TRUE, NULL);
+
+       //g_object_set(G_OBJECT(queue2), "leaky", 2, NULL);
+#if 0
+       g_object_set(G_OBJECT(queue3), "max-size-buffers", 0, NULL);
+       g_object_set(G_OBJECT(queue3), "max-size-time", 0, NULL);
+       g_object_set(G_OBJECT(queue3), "max-size-bytes", 0, NULL);
+#endif
+       //g_object_set(G_OBJECT(queue3), "leaky", 2, NULL);
+
+       // here to be continue
+       printf("vrsink signal-handoffs\n");
+       g_object_set(G_OBJECT(vrsink), "signal-handoffs", TRUE, NULL);
+       handler_p = g_signal_connect (vrsink, "handoff", G_CALLBACK(_pose_est_handoff), outputTensorData);
+       g_object_set(G_OBJECT(vrsink), "sync", FALSE, NULL);
+
+
+       g_object_set(G_OBJECT(vcrssink), "sync", FALSE, NULL);
+
+
+       /*  add a message handler */
+       bus = gst_pipeline_get_bus (GST_PIPELINE(pipeline));
+       bus_watch_id = gst_bus_add_watch(bus, bus_call, NULL);
+       gst_object_unref(bus);
+
+       /* add elements into the pipeline */
+       gst_bin_add_many(GST_BIN(pipeline),
+                                       source,
+                                       tee, queue1, vscale, vsfilter, vconv, vcfilter,
+                                       vrate, vrfilter, vrsink,
+                                       queue2, oconv, coverlay, sink,
+                                       NULL);
+
+       if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) {
+               gst_bin_add_many(GST_BIN(pipeline),
+                                       queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink,
+                                       NULL);
+               gst_element_link_many (tee, queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL);
+       }
+
+       /* link elements */
+       if (!ad->filename) {
+               gst_bin_add(GST_BIN(pipeline), filter);
+               gst_element_link_many(source, filter, tee, NULL);
+       } else {
+               gst_bin_add_many(GST_BIN(pipeline), decodebin, dscale, dconv, dsfilter, dcfilter, NULL);
+               gst_element_link_many(source, decodebin, NULL);
+               gst_element_link_many(dscale, dsfilter, dconv, dcfilter, tee, NULL);
+       }
+
+
+       if (!ad->filename2) {
+               // display
+               gst_element_link_many (tee, queue2, oconv, coverlay, /*toverlay,*/ sink, NULL);
+               // pose
+               gst_element_link_many (tee, queue1, vrate, vrfilter, vconv, vcfilter, vscale, vsfilter, vrsink, NULL);
+       } else {
+
+               gst_bin_add_many(GST_BIN(pipeline), tee2, enc, muxmp4, fsink2, queue4, queue5, encconv);
+
+               // display
+               gst_element_link_many (tee, queue2, oconv, coverlay, tee2, NULL);
+               gst_element_link_many (tee2, queue4, sink, NULL);
+               gst_element_link_many (tee2, queue5, encconv, enc, muxmp4, fsink2, NULL);
+
+               // pose
+               gst_element_link_many (tee, queue1, vrate, vrfilter, vconv, vcfilter, vscale, vsfilter, vrsink, NULL);
+       }
+       
+       /* set the pipeline state to "playing" state */
+       //gst_element_set_state(pipeline, GST_STATE_PLAYING);
+
+       /* loop */
+       humanSkeleton.IsDetected = false;
+       humanSkeleton.isPrevPose = false;
+       printf(TEXT_GREEN "Running.....\n" TEXT_RESET);
+       // GST_END
+#if 0
+       /* use gl backend */
+       elm_config_accel_preference_set("opengl");
+
+       /* create window */
+       //win = elm_win_add(NULL, PACKAGE, ELM_WIN_SPLASH );
+       win = elm_win_add(NULL, PACKAGE, ELM_WIN_BASIC);
+       if (win) {
+               elm_win_title_set(win, PACKAGE);
+               elm_win_borderless_set(win, EINA_TRUE);
+               elm_win_autodel_set(win, EINA_TRUE);
+               elm_win_alpha_set(win, EINA_FALSE);
+         evas_object_show(win);
+       }
+       elm_win_layer_set(win, 9);
+       elm_win_prop_focus_skip_set(win, EINA_TRUE);
+
+       ad->win = win;
+       g_win_id = win;
+       selected_win_id = g_win_id;
+
+       Evas_Object *bg = elm_bg_add(win);
+       elm_win_resize_object_add(win, bg);
+       evas_object_size_hint_min_set(bg, WIDTH, HEIGHT);
+       evas_object_size_hint_max_set(bg, WIDTH, HEIGHT);
+       evas_object_show(bg);
+
+       elm_win_activate(win);
+
+
+       evas_object_event_callback_add(win, EVAS_CALLBACK_RESIZE, win_resize_cb, NULL);
+#else
+       gst_element_set_state (pipeline, GST_STATE_PLAYING);
+#endif
+       return 0;
+}
+
+
+static int app_terminate(void *data)
+{
+       appdata *ad = data;
+       int i = 0;
+
+       /* out of loop */
+       printf(TEXT_GREEN "Stopping.....\n" TEXT_RESET);
+       gst_element_set_state(pipeline, GST_STATE_NULL);
+
+       printf(TEXT_GREEN "pipe unref.....\n" TEXT_RESET);
+       gst_object_unref(GST_OBJECT(pipeline));
+
+       printf(TEXT_GREEN "remove bus_watch id.....\n" TEXT_RESET);
+       g_source_remove(bus_watch_id);
+
+       if (overlay_state != NULL) {
+               printf(TEXT_GREEN "g_free overlay.....\n" TEXT_RESET);
+               g_free(overlay_state);
+       }
+
+       g_mutex_clear(&pose_mutex);
+       printf(TEXT_GREEN "done.....\n" TEXT_RESET);
+
+       if (g_win_id) {
+               evas_object_del(g_win_id);
+               g_win_id = NULL;
+       }
+       ad->win = NULL;
+       selected_win_id = NULL;
+
+       return 0;
+}
+struct appcore_ops ops = {
+       .create = app_create,
+       .terminate = app_terminate,
+};
+
+int main (int argc, char *argv[])
+{
+       memset(&ad, 0x0, sizeof(appdata));
+       ops.data = &ad;
+
+       if (argc >= 6) {
+               ad.filename = g_strdup(argv[5]);
+               printf("launch with file source (%s)\n", ad.filename);
+               if (argc > 6) {
+                       ad.filename2 = g_strdup(argv[6]);
+                       ad.numbuffers = -1;
+                       printf("records output(%s)\n", ad.filename2);
+               }
+               if (argc > 7) {
+                       ad.numbuffers = atoi(argv[7]);
+               }
+       } else {
+               printf("launch with camera source\n");
+       }
+
+       if (argc < 2) {
+               printf("usage: mv_stream_infer model [NeckThresVal, ArmThresVal, LegThresVal, [filename]]");
+               printf("model: 0(CPM), 1(HOURGLASS), 2(AIC Hand), 3(AIC Lite Hand\n");
+               return -1;
+       }
+
+       ad.modelType = atoi(argv[1]);
+       if (ad.modelType < 0 || ad.modelType > 3) {
+               printf("not supported model type\n");
+               return -1;
+       }
+
+       if (ad.modelType != MODEL_TYPE_POSE_HAND_AIC &&
+               ad.modelType != MODEL_TYPE_POSE_HAND_AICLite) {
+               thValNeck = (float)atoi(argv[2])/100.f;
+               thValArm = (float)atoi(argv[3])/100.f;
+               thValLeg = (float)atoi(argv[4])/100.f;
+
+               poseRoi.point.x = 50;
+               poseRoi.point.y = 0;
+               poseRoi.width = 100;
+               poseRoi.height = 192;
+       } else {
+               if (argc > 2) {
+                       ad.filename2 = g_strdup(argv[2]);
+               }
+               poseRoi.point.x = 0;
+               poseRoi.point.y = 0;
+               poseRoi.width = 0;
+               poseRoi.height = 0;
+       }
+
+       return appcore_efl_main(PACKAGE, &argc, &argv, &ops);
+}