From: Tae-Young Chung Date: Fri, 12 Jun 2020 01:24:49 +0000 (+0900) Subject: [testsuite/stream_infer] Support hand gesture model and add stream_infer X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8fb53bd7ce9e0dcbb18c07b2c0182e3cc4a74fec;p=platform%2Fcore%2Fapi%2Fmediavision.git [testsuite/stream_infer] Support hand gesture model and add stream_infer Hand gesture model consists of two models. One is a model that gets an image as an input and gives a hand segmentation and heatmaps as an output. Another is a model that gets filtered heatmaps, raw tensor buffer, as an input and gives coordinate and a gesture as an output. To support that, apis are added: mv_source_fill_by_tensor_buffer() v_inference_hand_detect() mv_inference_pose_estimation_detect() To test the models, stream_infer testsuite is added. Change-Id: Id150dd893c229e2a207e099b46f8d53b029e291f Signed-off-by: Tae-Young Chung --- diff --git a/include/mv_common.h b/include/mv_common.h index dc2faf88..f1244c71 100644 --- a/include/mv_common.h +++ b/include/mv_common.h @@ -19,6 +19,8 @@ #include +#include + #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ @@ -245,6 +247,16 @@ int mv_source_fill_by_buffer( unsigned int image_height, mv_colorspace_e image_colorspace); +int mv_source_fill_by_tensor_buffer( + mv_source_h source, + void *data_buffer, + mv_inference_data_type_e type, // common type may be required. and then it will be converted to inference data type + unsigned int buffer_size, + unsigned int width, + unsigned int height, + unsigned int channel, + unsigned int dimension); + /** * @brief Clears the buffer of the media source. * @@ -285,6 +297,10 @@ int mv_source_get_buffer( unsigned char **data_buffer, unsigned int *buffer_size); +int mv_source_get_tensor_buffer( + mv_source_h source, + void **data_buffer, + unsigned int *buffer_size); /** * @brief Gets height of the media source. * @@ -323,6 +339,14 @@ int mv_source_get_width( mv_source_h source, unsigned int *image_width); +int mv_source_get_channel( + mv_source_h source, + unsigned int *channel); + +int mv_source_get_dimension( + mv_source_h source, + unsigned int *dims); + /** * @brief Gets colorspace of the media source. * @@ -342,6 +366,8 @@ int mv_source_get_colorspace( mv_source_h source, mv_colorspace_e *image_colorspace); + +bool mv_source_is_tensor(mv_source_h source); /** * @brief Creates the handle to the configuration of engine. * diff --git a/include/mv_inference.h b/include/mv_inference.h index 5af41930..2b606685 100644 --- a/include/mv_inference.h +++ b/include/mv_inference.h @@ -724,6 +724,8 @@ int mv_inference_pose_estimation_detect( mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + float scale_width, + float scale_height, mv_inference_pose_estimation_detected_cb detected_cb, void *user_data); diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h index dbe0a85e..41a9e9de 100644 --- a/include/mv_inference_type.h +++ b/include/mv_inference_type.h @@ -51,7 +51,7 @@ typedef enum { * @deprecated Deprecated since 6.0. Use #mv_inference_target_device_e instead. * @brief Enumeration for inference target. * - * @since_tizem 5.5 + * @since_tizen 5.5 * */ typedef enum { diff --git a/mv_common/include/MediaSource.h b/mv_common/include/MediaSource.h index 0bd5e030..c67f4b78 100644 --- a/mv_common/include/MediaSource.h +++ b/mv_common/include/MediaSource.h @@ -119,13 +119,15 @@ public: bool fill(const unsigned char *buffer, unsigned int bufferSize, unsigned int width, unsigned int height, size_t offset); + bool fill(void *buffer, mv_inference_data_type_e type, unsigned int bufferSize, + unsigned int width, unsigned int height, unsigned int channel, unsigned int dimension); /** * @brief Gets data buffer of the MediaSource. * * @since_tizen @if MOBILE 2.4 @else 3.0 @endif * @return Pointer to the data buffer. */ - unsigned char *getBuffer(void) const; + void *getBuffer(void) const; /** * @brief Gets buffer size of the MediaSource. @@ -151,6 +153,9 @@ public: */ unsigned int getHeight(void) const; + unsigned int getChannel(void) const; + + unsigned int getDimension(void) const; /** * @brief Gets image colorspace of the MediaSource. * @@ -159,8 +164,12 @@ public: */ mv_colorspace_e getColorspace(void) const; + mv_inference_data_type_e getType(void) const; + + bool getIsTensor(void) const; + private: - unsigned char *m_pBuffer; /**< The data buffer */ + void *m_pBuffer; /**< The data buffer */ unsigned int m_bufferSize; /**< The buffer size */ @@ -169,6 +178,12 @@ private: unsigned int m_height; /**< The image height */ mv_colorspace_e m_colorspace; /**< The image colorspace */ + + unsigned int m_dim; + unsigned int m_ch; + bool m_isTensor; + mv_inference_data_type_e m_type; + }; } /* Common */ diff --git a/mv_common/include/mv_common_c.h b/mv_common/include/mv_common_c.h index 234fc529..539566c1 100644 --- a/mv_common/include/mv_common_c.h +++ b/mv_common/include/mv_common_c.h @@ -108,6 +108,16 @@ int mv_source_fill_by_buffer_c( unsigned int image_height, mv_colorspace_e image_colorspace); +int mv_source_fill_by_tensor_buffer_c( + mv_source_h source, + void *data_buffer, + mv_inference_data_type_e type, // common type may be required. and then it will be converted to inference data type + unsigned int buffer_size, + unsigned int width, + unsigned int height, + unsigned int channel, + unsigned int dimension); + /** * @brief Clears the buffer of the media source. * @@ -146,6 +156,11 @@ int mv_source_get_buffer_c( unsigned char **data_buffer, unsigned int *buffer_size); +int mv_source_get_tensor_buffer_c( + mv_source_h source, + void **buffer, + unsigned int *size); + /** * @brief Gets height of the media source. * @@ -182,6 +197,14 @@ int mv_source_get_width_c( mv_source_h source, unsigned int *image_width); +int mv_source_get_channel_c( + mv_source_h source, + unsigned int *channel); + + +int mv_source_get_dimension_c( + mv_source_h source, + unsigned int *dims); /** * @brief Gets colorspace of the media source. * @@ -200,6 +223,9 @@ int mv_source_get_colorspace_c( mv_source_h source, mv_colorspace_e *image_colorspace); +bool mv_source_is_tensor_c(mv_source_h source); + + /** * @brief Creates the handle to the configuration of engine. * diff --git a/mv_common/src/MediaSource.cpp b/mv_common/src/MediaSource.cpp index 50f956c7..cb5fc874 100644 --- a/mv_common/src/MediaSource.cpp +++ b/mv_common/src/MediaSource.cpp @@ -29,7 +29,11 @@ MediaSource::MediaSource() : m_bufferSize(0), m_width(0), m_height(0), - m_colorspace(MEDIA_VISION_COLORSPACE_INVALID) + m_colorspace(MEDIA_VISION_COLORSPACE_INVALID), + m_dim(0), + m_ch(0), + m_isTensor(false), + m_type(MV_INFERENCE_DATA_UINT8) { } @@ -66,6 +70,7 @@ bool MediaSource::alloc(unsigned int bufferSize, "the media source %p", colorspace, this); m_colorspace = colorspace; + m_type = MV_INFERENCE_DATA_UINT8; return true; } @@ -73,7 +78,10 @@ void MediaSource::clear(void) { if (m_pBuffer != NULL) { LOGD("Delete internal buffer for media source %p", this); - delete[] m_pBuffer; + if (m_type == MV_INFERENCE_DATA_FLOAT32) + delete[] static_cast(m_pBuffer); + else + delete[] static_cast(m_pBuffer); } LOGD("Set defaults for media source %p : buffer = NULL; " "bufferSize = 0; width = 0; height = 0; " @@ -83,6 +91,10 @@ void MediaSource::clear(void) m_width = 0; m_height = 0; m_colorspace = MEDIA_VISION_COLORSPACE_INVALID; + m_ch = 0; + m_dim = 0; + m_isTensor = false; + m_type = MV_INFERENCE_DATA_UINT8; } bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize, @@ -121,6 +133,7 @@ bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize, LOGD("Assign new colorspace (%i) of the internal buffer image for " "the media source %p", colorspace, this); m_colorspace = colorspace; + m_type = MV_INFERENCE_DATA_UINT8; return true; } @@ -144,18 +157,74 @@ bool MediaSource::fill(const unsigned char *buffer, unsigned int bufferSize, LOGD("Copy data from external buffer (%p) to the internal buffer (%p + %zd) of " "media source %p", buffer, m_pBuffer, offset, this); - std::memcpy(m_pBuffer + offset, buffer, bufferSize); + std::memcpy(static_cast(m_pBuffer) + offset, buffer, bufferSize); LOGD("size is %ui x %ui [%ui] on buffer(%p).", width, height, bufferSize, this); + m_type = MV_INFERENCE_DATA_UINT8; + + return true; +} + +bool MediaSource::fill(void *buffer, mv_inference_data_type_e type, unsigned int bufferSize, + unsigned int width, unsigned int height, unsigned int channel, unsigned int dimension) +{ + if (bufferSize == 0 || buffer == NULL) + return false; + + LOGD("Call clear() first for media source %p", this); + clear(); + + LOGD("Allocate memory [%i] for buffer in media source %p", bufferSize, this); + LOGD("Assign new size (%ui x %ui) of the internal buffer image for " + "the media source %p", width, height, this); + LOGD("Assign new buffer with type (%i) of the internal buffer for " + "the source %p", type, this); + if (type == MV_INFERENCE_DATA_FLOAT32) { + m_pBuffer = new (std::nothrow)float[bufferSize]; + } else if (type == MV_INFERENCE_DATA_UINT8) { + m_pBuffer = new (std::nothrow)unsigned char[bufferSize]; + } + + if (m_pBuffer == NULL) { + LOGE("Memory allocating for buffer in media source %p failed!", this); + return false; + } + + LOGD("Copy data from external buffer (%p) to the internal buffer (%p) of " + "media source %p", buffer, m_pBuffer, this); + std::memcpy(m_pBuffer, buffer, bufferSize); + + LOGD("Assign new size of the internal buffer of media source %p. " + "New size is %ui.", this, bufferSize); + m_bufferSize = bufferSize; + + LOGD("Assign new size (%ui x %ui x %ui) of the internal buffer image for " + "the media source %p", width, height, channel, this); + m_width = width; + m_height = height; + m_ch = channel; + m_dim = dimension; + m_isTensor = true; + m_type = type; + + LOGD("Assign new colorspace (%i) of the internal buffer image for " + "the media source %p", MEDIA_VISION_COLORSPACE_INVALID, this); + m_colorspace = MEDIA_VISION_COLORSPACE_INVALID; + return true; } -unsigned char *MediaSource::getBuffer(void) const +void *MediaSource::getBuffer(void) const { return m_pBuffer; } +mv_inference_data_type_e MediaSource::getType(void) const +{ + return m_type; +} + unsigned int MediaSource::getBufferSize(void) const { return m_bufferSize; @@ -171,10 +240,24 @@ unsigned int MediaSource::getHeight(void) const return m_height; } +unsigned int MediaSource::getChannel(void) const +{ + return m_ch; +} + +unsigned int MediaSource::getDimension(void) const +{ + return m_dim; +} + mv_colorspace_e MediaSource::getColorspace(void) const { return m_colorspace; } +bool MediaSource::getIsTensor(void) const +{ + return m_isTensor; +} } /* Common */ } /* MediaVision */ diff --git a/mv_common/src/mv_common.c b/mv_common/src/mv_common.c index c01536d0..283a19de 100644 --- a/mv_common/src/mv_common.c +++ b/mv_common/src/mv_common.c @@ -80,6 +80,29 @@ int mv_source_fill_by_buffer( return ret; } +int mv_source_fill_by_tensor_buffer( + mv_source_h source, + void *data_buffer, + mv_inference_data_type_e type, + unsigned int buffer_size, + unsigned int width, + unsigned int height, + unsigned int channel, + unsigned int dimension) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(data_buffer); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_fill_by_tensor_buffer_c( + source, data_buffer, type, buffer_size, width, height, + channel, dimension); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_source_clear( mv_source_h source) { @@ -110,6 +133,23 @@ int mv_source_get_buffer( return ret; } +int mv_source_get_tensor_buffer( + mv_source_h source, + void **data_buffer, + unsigned int *buffer_size) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(data_buffer); + MEDIA_VISION_NULL_ARG_CHECK(buffer_size); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_get_tensor_buffer_c(source, data_buffer, buffer_size); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_source_get_height( mv_source_h source, unsigned int *image_height) @@ -140,6 +180,36 @@ int mv_source_get_width( return ret; } +int mv_source_get_channel( + mv_source_h source, + unsigned int *channel) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(channel); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_get_channel_c(source, channel); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_source_get_dimension( + mv_source_h source, + unsigned int *dims) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(dims); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_get_dimension_c(source, dims); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_source_get_colorspace( mv_source_h source, mv_colorspace_e *image_colorspace) @@ -155,6 +225,19 @@ int mv_source_get_colorspace( return ret; } +bool mv_source_is_tensor(mv_source_h source) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + + MEDIA_VISION_FUNCTION_ENTER(); + bool ret = mv_source_is_tensor_c(source); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + + int mv_create_engine_config( mv_engine_config_h *engine_cfg) { diff --git a/mv_common/src/mv_common_c.cpp b/mv_common/src/mv_common_c.cpp index 28ba8382..50b9d860 100644 --- a/mv_common/src/mv_common_c.cpp +++ b/mv_common/src/mv_common_c.cpp @@ -232,6 +232,34 @@ int mv_source_fill_by_buffer_c( return MEDIA_VISION_ERROR_NONE; } +int mv_source_fill_by_tensor_buffer_c( + mv_source_h source, + void *data_buffer, + mv_inference_data_type_e type, + unsigned int buffer_size, + unsigned int width, + unsigned int height, + unsigned int channel, + unsigned int dimension) +{ + if (!source || buffer_size == 0 || data_buffer == NULL) { + LOGE("Media source can't be filled by tensor buffer because " + "one of the source or data_buffer is NULL or buffer_size = 0. " + "source = %p; data_buffer = %p; buffer_size = %u", + source, data_buffer, buffer_size); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + if (!(static_cast(source))->fill(data_buffer, + type, buffer_size, width, height, channel, dimension)) { + LOGE("mv_source_h filling from tehsor buffer failed"); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + LOGD("Media source has been filled from buffer"); + return MEDIA_VISION_ERROR_NONE; +} + int mv_source_clear_c( mv_source_h source) { @@ -258,6 +286,25 @@ int mv_source_get_buffer_c( } LOGD("Get media vision source [%p] buffer and buffer size to be returned", source); + *buffer = static_cast((static_cast(source))->getBuffer()); + *size = (static_cast(source))->getBufferSize(); + LOGD("Media vision source [%p] buffer (%p) and buffer size (%ui) has been returned", source, buffer, *size); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_source_get_tensor_buffer_c( + mv_source_h source, + void **buffer, + unsigned int *size) +{ + if (!source) { + LOGE("Impossible to get buffer for NULL mv_source_h handle"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGD("Get media vision source [%p] buffer and buffer size to be returned", source); + *buffer = (static_cast(source))->getBuffer(); *size = (static_cast(source))->getBufferSize(); LOGD("Media vision source [%p] buffer (%p) and buffer size (%ui) has been returned", source, buffer, *size); @@ -265,6 +312,7 @@ int mv_source_get_buffer_c( return MEDIA_VISION_ERROR_NONE; } + int mv_source_get_height_c( mv_source_h source, unsigned int *height) @@ -297,6 +345,39 @@ int mv_source_get_width_c( return MEDIA_VISION_ERROR_NONE; } +int mv_source_get_channel_c( + mv_source_h source, + unsigned int *channel) +{ + if (!source) { + LOGE("Impossible to get challen for NULL mv_source_h handle"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGD("Get media vision source [%p] width to be returned", source); + *channel = (static_cast(source))->getChannel(); + LOGD("Media vision source [%p] channel (%ui) has been returned", source, *channel); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_source_get_dimension_c( + mv_source_h source, + unsigned int *dims) +{ + if (!source) { + LOGE("Impossible to get width for NULL mv_source_h handle"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGD("Get media vision source [%p] dims to be returned", source); + *dims = (static_cast(source))->getDimension(); + LOGD("Media vision source [%p] dims (%ui) has been returned", source, *dims); + + return MEDIA_VISION_ERROR_NONE; +} + + int mv_source_get_colorspace_c( mv_source_h source, mv_colorspace_e *colorspace) @@ -313,6 +394,23 @@ int mv_source_get_colorspace_c( return MEDIA_VISION_ERROR_NONE; } +bool mv_source_is_tensor_c(mv_source_h source) +{ + // int mv_source_is_tensor_c(mv_source_h source, bool *isTensor) + /* + if (!source) { + LOGE("mv_source_h handle is NULL"); + return ; + } + */ + + LOGD("Get media vision source [%p]", source); + bool ret = (static_cast(source))->getIsTensor(); + LOGD("Media vision source [%p] is %s", source, ret ? "tensor" : "not tensor"); + + return ret; +} + int mv_create_engine_config_c( mv_engine_config_h *engine_cfg) { diff --git a/mv_inference/inference/CMakeLists.txt b/mv_inference/inference/CMakeLists.txt index 362bc9fb..0b8522b2 100644 --- a/mv_inference/inference/CMakeLists.txt +++ b/mv_inference/inference/CMakeLists.txt @@ -28,7 +28,7 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXXFLAGS} -Wno-unused-parameter FILE(GLOB MV_INFERENCE_INCLUDE_LIST "${PROJECT_SOURCE_DIR}/include/*.h" "${PROJECT_SOURCE_DIR}/include/*.hpp") FILE(GLOB MV_INFERENCE_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") -FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc) +FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc imgcodecs) if(NOT OpenCV_FOUND) MESSAGE(SEND_ERROR "OpenCV NOT FOUND") RETURN() diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h index 4c126ce9..7422a0ab 100755 --- a/mv_inference/inference/include/Inference.h +++ b/mv_inference/inference/include/Inference.h @@ -62,13 +62,15 @@ typedef struct _FacialLandMarkDetectionResults { typedef struct _PoseEstimationResults { int number_of_pose_estimation; - std::vector locations; + std::vector locations; + int gesture; } PoseEstimationResults; /**< structure PoseEstimationResults */ typedef struct _HandDetectionResults { int number_of_hands; std::vector confidences; std::vector locations; + void* outputTensorData; } HandDetectionResults; /**< structure HandDetectionResults */ namespace mediavision { @@ -345,6 +347,8 @@ private: cv::Size mSourceSize; cv::Mat mInputBuffer; + cv::Mat mHeatMapMatrix; + mv_engine_config_h engine_config; InferenceEngineCommon * mBackend; diff --git a/mv_inference/inference/include/mv_inference_open.h b/mv_inference/inference/include/mv_inference_open.h index 3bdc5593..20c7d7c8 100755 --- a/mv_inference/inference/include/mv_inference_open.h +++ b/mv_inference/inference/include/mv_inference_open.h @@ -530,6 +530,8 @@ int mv_inference_pose_estimation_detect_open( mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + float scale_width, + float scale_height, mv_inference_pose_estimation_detected_cb detected_cb, void *user_data); diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp index c7b5d2f2..da88c532 100755 --- a/mv_inference/inference/src/Inference.cpp +++ b/mv_inference/inference/src/Inference.cpp @@ -17,7 +17,7 @@ #include "mv_private.h" #include "Inference.h" #include "InferenceIni.h" - +#include #include #include @@ -686,6 +686,7 @@ int Inference::FillOutputResult(tensor_t &outputData) } outputData.data.push_back((void *)mOutputTensorBuffers[i].buffer); + LOGI("%p", mOutputTensorBuffers[i].buffer); } return MEDIA_VISION_ERROR_NONE; @@ -870,9 +871,9 @@ int Inference::Run(std::vector &mvSources, std::vector &mvSources, std::vector(&buffer), &bufferSize)) + return MEDIA_VISION_ERROR_INTERNAL; - // TODO. Let's support various color spaces. + // TODO. Let's support various color spaces. - if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) { - LOGE("Not Supported format!\n"); - return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; - } + if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) { + LOGE("Not Supported format!\n"); + return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; + } - if (roi == NULL) { - cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer).clone(); - } else { - cvRoi.x = roi->point.x; - cvRoi.y = roi->point.y; - cvRoi.width = (roi->point.x + roi->width) >= width ? width - roi->point.x : roi->width; - cvRoi.height = (roi->point.y + roi->height) >= height ? height - roi->point.y : roi->height; - cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer)(cvRoi).clone(); - } + if (roi == NULL) { + cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer).clone(); + } else { + cvRoi.x = roi->point.x; + cvRoi.y = roi->point.y; + cvRoi.width = (roi->point.x + roi->width) >= width ? width - roi->point.x : roi->width; + cvRoi.height = (roi->point.y + roi->height) >= height ? height - roi->point.y : roi->height; + cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), buffer)(cvRoi).clone(); + } - LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height); + LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height); - if (mCh != 1 && mCh != 3) { - LOGE("Channel not supported."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; + if (mCh != 1 && mCh != 3) { + LOGE("Channel not supported."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } } + + std::vector::iterator iter; for (iter = mInputTensorBuffers.begin(); iter != mInputTensorBuffers.end(); iter++) { inference_engine_tensor_buffer tensor_buffer = *iter; - int data_type = ConvertToCv(tensor_buffer.data_type); + if (mv_source_is_tensor(mvSource)) { + memcpy(tensor_buffer.buffer, buffer, bufferSize); + } else { - // Convert color space of input tensor data and then normalize it. - ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, data_type, tensor_buffer.buffer), data_type); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to preprocess input tensor data."); - return ret; + int data_type = ConvertToCv(tensor_buffer.data_type); + + // Convert color space of input tensor data and then normalize it. + ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, data_type, tensor_buffer.buffer), data_type); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to preprocess input tensor data."); + return ret; + } } } @@ -1262,6 +1279,7 @@ int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectio std::vector> inferDimInfo(outputData.dimInfo); std::vector inferResults(outputData.data.begin(), outputData.data.end()); +#if 0 long number_of_pose = inferDimInfo[0][3]; float * tmp = static_cast(inferResults[0]); cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); @@ -1297,6 +1315,51 @@ int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectio *detectionResults = results; LOGE("Inference: PoseEstimationResults: %d\n", results.number_of_pose_estimation); + + #else + for (int k = 0; k < inferDimInfo.size(); ++k) { + LOGI("output: %d", k); + LOGI("addr: %p", inferResults[k]); + for(int d = 0; d < inferDimInfo[k].size(); ++d) { + LOGI("%d", inferDimInfo[k][d]); + } + LOGI("\n"); + } + + //float* coordsPtr = reinterpret_cast(inferResults[0]); + int64_t * gesturePtr = reinterpret_cast(inferResults[1]); + //LOGI("%lld", gesturePtr[0]); + cv::Mat gestureOut(cv::Size(inferDimInfo[1][0], inferDimInfo[1][1]), CV_8UC(sizeof(int64_t)), gesturePtr); + cv::Mat gestureReshaped = gestureOut.reshape(sizeof(int64_t), inferDimInfo[1][0]); + cv::Mat gestureArr[8]; + cv::split(gestureReshaped, gestureArr); + int gesture = gestureArr[0].at(0); + + //float ratioX = (float)mSourceSize.width;; + //float ratioY = (float)mSourceSize.height; + + int64_t * coordPtr = reinterpret_cast(inferResults[0]); + long number_of_pose = inferDimInfo[0][1]; + cv::Mat coordOut(cv::Size(inferDimInfo[0][0], inferDimInfo[0][1]), CV_32FC(inferDimInfo[0][2]), coordPtr); + + PoseEstimationResults results; + results.number_of_pose_estimation = 0; + results.gesture = (int)gesture; + + for (int poseIdx = 0; poseIdx < number_of_pose; poseIdx++) { + cv::Point2f loc; + cv::Vec2f coord = coordOut.at(0,poseIdx); + LOGI("PoseIdx[%2d]: x[%2f], y[%2f]", poseIdx, coord[0], coord[1]); + + loc.x = coord[0]; + loc.y = coord[1]; + results.locations.push_back(loc); + results.number_of_pose_estimation++; + } + + *detectionResults = results; + + #endif return MEDIA_VISION_ERROR_NONE; } @@ -1311,88 +1374,186 @@ int Inference::GetHandDetectionResults(HandDetectionResults *detectionResults) return ret; } - // In case of object detection, - // a model may apply post-process but others may not. - // Thus, those cases should be hanlded separately. +#if 1 std::vector> inferDimInfo(outputData.dimInfo); - LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); - std::vector inferResults(outputData.data.begin(), outputData.data.end()); - LOGI("inferResults size: %zu", inferResults.size()); - float* boxes = nullptr; - float* classes = nullptr; - float* scores = nullptr; - int number_of_detections = 0; + for (int k = 0; k < inferDimInfo.size(); ++k) { + LOGI("output: %d", k); + LOGI("addr: %p", inferResults[k]); + for(int d = 0; d < inferDimInfo[k].size(); ++d) { + LOGI("%d", inferDimInfo[k][d]); + } + LOGI("\n"); + } + HandDetectionResults results; + results.number_of_hands = 0; - cv::Mat cvScores, cvClasses, cvBoxes; - if (outputData.dimInfo.size() == 1) { - // there is no way to know how many objects are detect unless the number of objects aren't - // provided. In the case, each backend should provide the number of results manually. - // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are - // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 - // indicats the image id. But it is useless if a batch mode isn't supported. - // So, use the 1st of 7. + // bbox + int64_t * bboxPtr = reinterpret_cast(inferResults[0]); + float* heatmapPtr = reinterpret_cast(inferResults[1]); - number_of_detections = (int)(*reinterpret_cast(outputData.data[0])); - cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], CV_32F, outputData.data[0]); + double number_of_results = 0; + double maxValue = 0.0; + cv::Mat kernel = cv::Mat::ones(2,2, CV_8UC1); - // boxes - cv::Mat cvLeft = cvOutputData.col(3).clone(); - cv::Mat cvTop = cvOutputData.col(4).clone(); - cv::Mat cvRight = cvOutputData.col(5).clone(); - cv::Mat cvBottom = cvOutputData.col(6).clone(); + //int maskSz[] = {inferDimInfo[1][2], inferDimInfo[1][1], inferDimInfo[1][3]}; + //int heatMapSz[] = {inferDimInfo[1][2], inferDimInfo[1][1], inferDimInfo[1][3]}; + //dj + cv::Mat bboxOut(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), CV_8UC(sizeof(int64_t)), bboxPtr); + cv::Mat bboxReshaped = bboxOut.reshape(sizeof(int64_t), inferDimInfo[1][2]); + cv::Mat bboxArr[8]; + cv::split(bboxReshaped, bboxArr); + cv::Mat bbox = bboxArr[0]; - cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; - cv::hconcat(cvBoxElems, 4, cvBoxes); + bbox = bbox*255; - // classes - cvClasses = cvOutputData.col(1).clone(); + ////cv::imwrite("/tmp/dumpbbox.jpg",bbox); + cv::Mat bboxOpened, bboxClosed; + cv::Mat backGround, distTransformed, foreGroundF, foreGroundU; + cv::Mat unKnown, markers; - // scores - cvScores = cvOutputData.col(2).clone(); + // opening + cv::morphologyEx(bbox, bboxOpened, cv::MORPH_OPEN, kernel); + // dilate + cv::dilate(bboxOpened, backGround, kernel); - boxes = cvBoxes.ptr(0); - classes = cvClasses.ptr(0); - scores = cvScores.ptr(0); + // get euclidean distance by distance transform + cv::distanceTransform(bboxOpened, distTransformed, cv::DIST_L2, 5); // euclidean dist. - } else { - boxes = reinterpret_cast(inferResults[0]); - classes = reinterpret_cast(inferResults[1]); - scores = reinterpret_cast(inferResults[2]); - number_of_detections = (int)(*reinterpret_cast(inferResults[3])); - } + cv::minMaxLoc(distTransformed, NULL, &maxValue, NULL, NULL); + LOGI("max value of distTransformed: %f", maxValue); + cv::threshold(distTransformed, foreGroundF, 0.5*maxValue, 255, cv::THRESH_BINARY); - int left, top, right, bottom; - cv::Rect loc; + LOGI("type of foreGround: %d\n", foreGroundF.type()); //#define CV_8F 5 + LOGI("type of backGround: %d\n", backGround.type()); //#define CV_8U 0 + foreGroundF.convertTo(foreGroundU, CV_8U); + LOGI("type of backGround: %d\n", foreGroundU.type()); //#define CV_8U 0 + cv::subtract(backGround, foreGroundU , unKnown); + LOGI("type of unKnown: %d\n", unKnown.type()); // - HandDetectionResults results; - results.number_of_hands = 0; - for (int idx = 0; idx < number_of_detections; ++idx) { - if (scores[idx] < mThreshold) - continue; - left = (int)(boxes[idx*4 + 1] * mSourceSize.width); - top = (int)(boxes[idx*4 + 0] * mSourceSize.height); - right = (int)(boxes[idx*4 + 3] * mSourceSize.width); - bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height); + cv::connectedComponents(foreGroundU, markers); - loc.x = left; - loc.y = top; - loc.width = right -left + 1; - loc.height = bottom - top + 1; + markers += 1; - results.confidences.push_back(scores[idx]); - results.locations.push_back(loc); - results.number_of_hands++; + markers.setTo(0, markers==255); - LOGI("confidence:%f", scores[idx]); - LOGI("class: %f", classes[idx]); - LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx*4 + 1], boxes[idx*4 + 0], boxes[idx*4 + 3], boxes[idx*4 + 2]); - LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom); + cv::minMaxLoc(markers, NULL, &number_of_results, NULL, NULL); + //results.number_of_pose_estimation = static_cast(number_of_results)-1; + LOGI("number_of_results: %d", static_cast(number_of_results)-1); + + //cv::Mat heatMap = cv::Mat(3, heatMapSz, CV_32FC1, heatmapPtr); + cv::Mat heatMap = cv::Mat(cv::Size(inferDimInfo[1][2],inferDimInfo[1][1]), CV_32FC(inferDimInfo[1][3]), heatmapPtr); + cv::Mat heatMapReshape = heatMap.reshape(inferDimInfo[1][3], inferDimInfo[1][2]); + cv::Mat heatMapReshapeArr[inferDimInfo[1][3]]; + cv::Mat heatMapMatrixArr[inferDimInfo[1][3]]; + cv::split(heatMapReshape, heatMapReshapeArr); + + + cv::Mat mask = cv::Mat::zeros(inferDimInfo[0][2], inferDimInfo[0][1], CV_8UC1); + cv::Mat maskImg = cv::Mat::zeros(inferDimInfo[0][2], inferDimInfo[0][1], CV_8UC1); + cv::Mat kernel2 = cv::Mat::ones(3,3, CV_8UC1); + + + float ratioX = (float)mSourceSize.width / (float)inferDimInfo[0][2]; + float ratioY = (float)mSourceSize.height / (float)inferDimInfo[0][1]; + for (int resultIdx = 0; resultIdx < (static_cast(number_of_results)-1); resultIdx++) { + mask.setTo(1, markers==(resultIdx + 2)); + + cv::Mat maskIdx; + findNonZero(mask, maskIdx); + int numIdx = maskIdx.total(); + LOGI("type of maskIdx: %d, and ch: %d, total: %d points\n", mask.type(), mask.channels(), numIdx ); + int minX, minY; + int maxX, maxY; + int maxBBSize = 0; + int maxBBSizeIdx = -1; + minX = minY = 100; + maxX = maxY = -1; + + for (int idx = 0; idx < numIdx; ++idx) { + //LOGI("%d, %d", mask.at(0).x, mask.at(0).y); + if (maskIdx.at(idx).x < minX) { + minX = maskIdx.at(idx).x; + } + + if (maskIdx.at(idx).y < minY) { + minY = maskIdx.at(idx).y; + } + + if (maskIdx.at(idx).x >= maxX) { + maxX = maskIdx.at(idx).x; + } + + if (maskIdx.at(idx).y >= maxY) { + maxY = maskIdx.at(idx).y; + } + } + + int bbSize = (maxX - minX) * (maxY - maxX); + if (bbSize > maxBBSize) { + maxBBSize = bbSize; + maxBBSizeIdx = resultIdx; + + minX = (int)((float)minX * ratioX); + maxX = (int)((float)maxX * ratioX); + minY = (int)((float)minY * ratioY); + maxY = (int)((float)maxY * ratioY); + results.locations.push_back(cv::Rect(minX, minY, (maxX - minX +1), (maxY-minY +1))); + } + + LOGI("(%d,%d) - (%d,%d): size %d(idx:%d)", minX, minY, maxX, maxY, maxBBSize, maxBBSizeIdx); + + cv::dilate(mask, maskImg, kernel2, cv::Point(-1,-1), 4); + + /* + cv::Mat maskImg3d(3, maskSz, CV_32FC1); + + //cv::Mat maskImg3d = maskImg.reshape(1, 3, sz); + + for (int d1 = 0; d1 < inferDimInfo[1][2]; ++d1) { + for (int d2 = 0; d2 < inferDimInfo[1][1]; ++d2) { + for (int d3 = 0; d3 < inferDimInfo[0][3]; ++d3) { + maskImg3d.at(d1, d2, d3) = maskImg.at(d1, d2); + } + } + } + */ + + cv::Mat maskImgF; + maskImg.convertTo(maskImgF, CV_32FC1); + for (int ch = 0; ch < inferDimInfo[1][3]; ++ch ) { + cv::multiply(heatMapReshapeArr[ch], maskImgF, heatMapMatrixArr[ch]); + //char dumpName[1024]; + + //snprintf(dumpName, 1024, "/tmp/heatmapDump_%d.csv", k); + double maxVal; + minMaxLoc(heatMapMatrixArr[ch], NULL, &maxVal, NULL, NULL); + LOGI("%d: %f", ch, maxVal); + /* + snprintf(dumpName, 1024, "/tmp/heatmapDump_%d.csv", k); + LOGI("%s", dumpName); + std::ofstream dumpFile; + dumpFile.open(dumpName); + LOGI("tempArr shape: %dx%d", tempArr[k].size[0], tempArr[k].size[1]); + dumpFile << cv::format(tempArr[k], cv::Formatter::FMT_CSV) << std::endl; + dumpFile.close(); + */ + } + + cv::merge(heatMapMatrixArr, inferDimInfo[1][3], mHeatMapMatrix); } + LOGE("heatmapMatrix: type[%d], size[%d], elemSize[%d]", mHeatMapMatrix.type(), + mHeatMapMatrix.total(), mHeatMapMatrix.elemSize()); + + results.number_of_hands = static_cast(number_of_results)-1; + results.outputTensorData = mHeatMapMatrix.ptr(); + LOGE("mHeatMapMatrix: %p", results.outputTensorData); *detectionResults = results; +#endif + LOGE("Inference: GetHandDetectionResults: %d\n", results.number_of_hands); return MEDIA_VISION_ERROR_NONE; } diff --git a/mv_inference/inference/src/mv_inference.c b/mv_inference/inference/src/mv_inference.c index c08339c8..19fe9c83 100755 --- a/mv_inference/inference/src/mv_inference.c +++ b/mv_inference/inference/src/mv_inference.c @@ -319,6 +319,8 @@ int mv_inference_pose_estimation_detect( mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + float scale_width, + float scale_height, mv_inference_pose_estimation_detected_cb detected_cb, void *user_data) { @@ -336,8 +338,8 @@ int mv_inference_pose_estimation_detect( ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data); */ #else - - ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data); + LOGE("%p", user_data); + ret = mv_inference_pose_estimation_detect_open(source, infer, roi, scale_width, scale_height, detected_cb, user_data); MEDIA_VISION_FUNCTION_LEAVE(); diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp index 2de002a2..957bbd7c 100755 --- a/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_inference/inference/src/mv_inference_open.cpp @@ -23,6 +23,9 @@ #include #include +#include +#include + using namespace mediavision::inference; static int check_mv_inference_engine_version(mv_engine_config_h engine_config, bool *is_new_version) @@ -811,6 +814,8 @@ int mv_inference_pose_estimation_detect_open( mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + float scale_width, + float scale_height, mv_inference_pose_estimation_detected_cb detected_cb, void *user_data) { @@ -843,12 +848,40 @@ int mv_inference_pose_estimation_detect_open( std::vector locations(numberOfPoseEstimation); + unsigned int tmpWidth = 0; + unsigned int tmpHeight = 0; + unsigned char *buffer = NULL; + unsigned int size = 0; + + mv_source_h* tmpSource = (mv_source_h*)(user_data); + cv::Mat dumpMap; + if(user_data) { + mv_source_get_width(*tmpSource, &tmpWidth); + mv_source_get_height(*tmpSource, &tmpHeight); + LOGE("%d, %d", tmpWidth, tmpHeight); + mv_source_get_buffer(*tmpSource, &buffer, &size); + LOGE("%p", buffer); + dumpMap = cv::Mat(cv::Size(225,225), CV_8UC3, buffer); + } else { + LOGE("user_data is NULL"); + } + for (int n = 0; n < numberOfPoseEstimation; ++n) { - locations[n].x = poseEstimationResults.locations[n].x; - locations[n].y = poseEstimationResults.locations[n].y; - } + locations[n].x = (int)(poseEstimationResults.locations[n].x * scale_width); + locations[n].y = (int)(poseEstimationResults.locations[n].y * scale_height); + cv::Point point((int)(poseEstimationResults.locations[n].x * scale_width), + (int)(poseEstimationResults.locations[n].y * scale_height)); + //cv::drawMarker(dumpMap, point, cv::Scalar(0,255,0), cv::MARKER_DIAMOND, ); + if(user_data) { + cv::circle(dumpMap, point, 1, cv::Scalar(0,255,0), 2); + } + } + if (user_data) { + cv::cvtColor(dumpMap, dumpMap, cv::COLOR_RGB2BGR); + cv::imwrite("/tmp/dumpOut.jpg", dumpMap); + } detected_cb(source, numberOfPoseEstimation, locations.data(), user_data); return ret; @@ -884,17 +917,32 @@ int mv_inference_hand_detect_open( numberOfOutputs = handDetectionResults.number_of_hands; + LOGW("numberOfOutputs: %d", numberOfOutputs); float *confidences = handDetectionResults.confidences.data(); + LOGW("done"); std::vector locations(numberOfOutputs); - for (int n = 0; n < numberOfOutputs; ++n) { - locations[n].point.x = handDetectionResults.locations[n].x; - locations[n].point.y = handDetectionResults.locations[n].y; - locations[n].width = handDetectionResults.locations[n].width; - locations[n].height = handDetectionResults.locations[n].height; + LOGE("user_data:%p", user_data); + LOGE("outputTensorData:%p", handDetectionResults.outputTensorData); + if (user_data) { + locations.clear(); + std::vector().swap(locations); + if (handDetectionResults.outputTensorData) { + LOGW("try to get outputTensorData: %zd", sizeof(float)); + memcpy(user_data, handDetectionResults.outputTensorData, sizeof(float)*(56*56*21)); + } else { + LOGW("outputTensorData is NULL"); + } + } else { + for (int n = 0; n < numberOfOutputs; ++n) { + locations[n].point.x = handDetectionResults.locations[n].x; + locations[n].point.y = handDetectionResults.locations[n].y; + locations[n].width = handDetectionResults.locations[n].width; + locations[n].height = handDetectionResults.locations[n].height; + } } - detected_cb(source, numberOfOutputs, confidences, locations.data(), user_data); + detected_cb(source, numberOfOutputs, confidences, user_data == NULL ? locations.data() : NULL, user_data); return ret; -} \ No newline at end of file +} diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 2fdf73f4..9e15c792 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -25,11 +25,22 @@ BuildRequires: libavutil-devel BuildRequires: pkgconfig(gstreamer-1.0) BuildRequires: pkgconfig(gstreamer-base-1.0) BuildRequires: pkgconfig(gstreamer-app-1.0) +BuildRequires: pkgconfig(gstreamer-video-1.0) +BuildRequires: pkgconfig(cairo) BuildRequires: pkgconfig(libtzplatform-config) BuildRequires: pkgconfig(iniparser) BuildRequires: pkgconfig(ncurses) BuildRequires: pkgconfig(inference-engine-interface-common) +BuildRequires: pkgconfig(tizen-extension-client) +BuildRequires: pkgconfig(elementary) +BuildRequires: pkgconfig(ecore) +BuildRequires: pkgconfig(evas) +BuildRequires: pkgconfig(ecore-wl2) +BuildRequires: pkgconfig(ecore-evas) +BuildRequires: pkgconfig(appcore-efl) +BuildRequires: capi-ui-efl-util-devel + %description Media Vision library for Tizen Native API. Includes barcode detecting, barcode generating, face and image modules. @@ -279,6 +290,7 @@ install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj %TZ_SYS_BIN/mv_image* %TZ_SYS_BIN/mv_surveillance* %TZ_SYS_BIN/mv_infer* +%TZ_SYS_BIN/mv_stream* %if 0%{?gcov:1} %files gcov diff --git a/src/mv_common.c b/src/mv_common.c index c01536d0..80be8779 100644 --- a/src/mv_common.c +++ b/src/mv_common.c @@ -80,6 +80,29 @@ int mv_source_fill_by_buffer( return ret; } +int mv_source_fill_by_tensor_buffer( + mv_source_h source, + void *data_buffer, + mv_inference_data_type_e type, + unsigned int buffer_size, + unsigned int width, + unsigned int height, + unsigned int channel, + unsigned int dimension) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(data_buffer); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_fill_by_tensor_buffer_c( + source, data_buffer, type, buffer_size, width, height, + channel, dimension); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_source_clear( mv_source_h source) { @@ -110,6 +133,23 @@ int mv_source_get_buffer( return ret; } +int mv_source_get_tensor_buffer( + mv_source_h source, + void **data_buffer, + unsigned int *buffer_size) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(data_buffer); + MEDIA_VISION_NULL_ARG_CHECK(buffer_size); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_get_tensor_buffer_c(source, data_buffer, buffer_size); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_source_get_height( mv_source_h source, unsigned int *image_height) @@ -140,6 +180,36 @@ int mv_source_get_width( return ret; } +int mv_source_get_channel( + mv_source_h source, + unsigned int *channel) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(channel); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_get_channel_c(source, channel); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_source_get_dimension( + mv_source_h source, + unsigned int *dims) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_NULL_ARG_CHECK(dims); + + MEDIA_VISION_FUNCTION_ENTER(); + int ret = mv_source_get_dimension_c(source, dims); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_source_get_colorspace( mv_source_h source, mv_colorspace_e *image_colorspace) @@ -155,6 +225,19 @@ int mv_source_get_colorspace( return ret; } + +bool mv_source_is_tensor(mv_source_h source) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + + MEDIA_VISION_FUNCTION_ENTER(); + bool ret = mv_source_is_tensor_c(source); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + int mv_create_engine_config( mv_engine_config_h *engine_cfg) { diff --git a/src/mv_inference.c b/src/mv_inference.c index 46fe8cb3..efeb49cd 100644 --- a/src/mv_inference.c +++ b/src/mv_inference.c @@ -318,6 +318,8 @@ int mv_inference_pose_estimation_detect( mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + float scale_width, + float scale_height, mv_inference_pose_estimation_detected_cb detected_cb, void *user_data) { @@ -335,11 +337,41 @@ int mv_inference_pose_estimation_detect( ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data); */ #else + LOGE("%p", user_data); + ret = mv_inference_pose_estimation_detect_open(source, infer, roi, scale_width, scale_height, detected_cb, user_data); - ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data); + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +#endif +} + +int mv_inference_hand_detect( + mv_source_h source, + mv_inference_h infer, + mv_inference_hand_detected_cb detected_cb, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT + /* + ret = mv_inference_hand_detect_lic(source, infer, detected_cb, user_data); + */ +#else + + ret = mv_inference_hand_detect_open(source, infer, detected_cb, user_data); MEDIA_VISION_FUNCTION_LEAVE(); return ret; + #endif } \ No newline at end of file diff --git a/test/testsuites/CMakeLists.txt b/test/testsuites/CMakeLists.txt index 389e6118..29d6cfb3 100644 --- a/test/testsuites/CMakeLists.txt +++ b/test/testsuites/CMakeLists.txt @@ -11,3 +11,4 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/face) add_subdirectory(${PROJECT_SOURCE_DIR}/image) add_subdirectory(${PROJECT_SOURCE_DIR}/surveillance) add_subdirectory(${PROJECT_SOURCE_DIR}/inference) +add_subdirectory(${PROJECT_SOURCE_DIR}/stream_infer) diff --git a/test/testsuites/inference/inference_test_suite.c b/test/testsuites/inference/inference_test_suite.c index 648c085f..420ac981 100644 --- a/test/testsuites/inference/inference_test_suite.c +++ b/test/testsuites/inference/inference_test_suite.c @@ -31,6 +31,7 @@ #include #include + #define FILE_PATH_SIZE 1024 //Image Classification @@ -67,6 +68,11 @@ //Pose Estimation #define PE_TFLITE_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model.tflite" +#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite" +#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite" +//#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_tf113_int32.tflite" +//#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1_tf113.tflite" + /****** * Public model: * IC: mobilenet caffe, tf? @@ -82,6 +88,8 @@ #define NANO_PER_MILLI ((__clock_t) 1000000) #define MILLI_PER_SEC ((__clock_t) 1000) +static void * outputTensorData = NULL; + struct timespec diff(struct timespec start, struct timespec end) { struct timespec temp; @@ -166,21 +174,149 @@ void _pose_estimation_detected_cb ( } } + +int perform_tflite_hand_detection2(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine cofniguration.\n"); + } + } + return err; + } + + char *inputNodeName = "input"; + char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"}; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_AIC_2_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(handle, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(handle, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 56); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 56); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 21); + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(handle, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeNames, + 2); + + *engine_cfg = handle; + return err; +} + +void _hand_pose_cb ( + mv_source_h source, + const int number_of_pose_estimation, + const mv_point_s *locations, + void *user_data) +{ + printf("In callback, %d pose estimation\n", number_of_pose_estimation); + if (!user_data) { + for (int n = 0; n < number_of_pose_estimation; n++) { + printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y); + } + } else { + printf("%p\n", user_data); + } +} + void _hand_detected_cb ( mv_source_h source, const int number_of_hands, const float *confidences, const mv_rectangle_s *locations, - void *user_data) + void *user_data) //user_data can be mv_source? { printf("In callback: %d hands\n", number_of_hands); - for (int n = 0; n < number_of_hands; n++) { - printf("%.3f\n", confidences[n]); - printf("%d,%d,%d,%d\n", locations[n].point.x, - locations[n].point.y, - locations[n].width, - locations[n].height); + if (!user_data) { + for (int n = 0; n < number_of_hands; n++) { + printf("%.3f\n", confidences[n]); + printf("%d,%d,%d,%d\n", locations[n].point.x, + locations[n].point.y, + locations[n].width, + locations[n].height); + } + } else { + printf("%p\n", user_data); + + mv_source_h source2; + mv_create_source(&source2); + mv_source_fill_by_tensor_buffer(source2, user_data, MV_INFERENCE_DATA_FLOAT32, + 56 * 56 * 21* sizeof(float), + 56, 56, 21, 3); + + mv_engine_config_h engine_cfg2; + mv_create_engine_config(&engine_cfg2); + + perform_tflite_hand_detection2(&engine_cfg2); + + mv_inference_h infer2; + int err = mv_inference_create(&infer2); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + } + printf("infer2 created\n"); + + //configure + err = mv_inference_configure(infer2, engine_cfg2); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle [err:%i]\n", err); + } + printf("engine_cfg2 configured\n"); + + //prepare + err = mv_inference_prepare(infer2); + printf("infer2 prepared\n"); + + err = mv_inference_pose_estimation_detect(source2, infer2, NULL, 225.f, 225.f,_hand_pose_cb, (&source)); + printf("pose estimated\n"); + + mv_destroy_source(source2); + printf("destroy source2"); + + mv_inference_destroy(infer2); + mv_destroy_engine_config(engine_cfg2); } } @@ -2552,7 +2688,7 @@ int perform_pose_estimation_detection() clock_gettime(CLOCK_MONOTONIC, &s_tspec); // Pose estimation - err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _pose_estimation_detected_cb, NULL); + err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, 1.f, 1.f, _pose_estimation_detected_cb, NULL); clock_gettime(CLOCK_MONOTONIC, &e_tspec); @@ -2636,9 +2772,10 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg) char *inputNodeName = "input"; char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"}; + outputTensorData = (void*)calloc(56*56*21, sizeof(float)); mv_engine_config_set_string_attribute(handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - HD_TFLITE_WEIGHT_PATH); + PE_TFLITE_AIC_1_WEIGHT_PATH); mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, @@ -2685,84 +2822,14 @@ int perform_tflite_hand_detection(mv_engine_config_h *engine_cfg) return err; } -int perform_armnn_hand_detection(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine cofniguration.\n"); - } - } - return err; - } - - char *inputNodeName = "input"; - char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"}; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - HD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, - MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, - MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_ARMNN); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_TARGET_DEVICE_TYPE, - MV_INFERENCE_TARGET_DEVICE_CPU); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_WIDTH, - 224); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 224); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, - 3); - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute(handle, - MV_INFERENCE_OUTPUT_NODE_NAMES, - outputNodeNames, - 2); - - *engine_cfg = handle; - return err; -} - int perform_hand_detection() { int err = MEDIA_VISION_ERROR_NONE; int sel_opt = 0; - const int options[6] = {1, 2, 3, 4, 5, 6}; - const *names[6] = { "Configuration", + const int options[5] = {1, 2, 3, 4, 5}; + const *names[5] = { "Configuration", "TFLITE(CPU) + HandDetection", - "ARMNN(CPU) + HandDetection", "Prepare", "Run", "Back"}; @@ -2772,7 +2839,7 @@ int perform_hand_detection() mv_source_h mvSource = NULL; while(sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, 6); + sel_opt = show_menu("Select Action:", options, names, 5); switch (sel_opt) { case 1: { @@ -2798,17 +2865,6 @@ int perform_hand_detection() } break; case 3: - { - //perform pose estimation config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - err = perform_armnn_hand_detection(&engine_cfg); - } - break; - case 4: { // create - configure - prepare if (infer) { @@ -2841,7 +2897,7 @@ int perform_hand_detection() } } break; - case 5: + case 4: { if (mvSource) { int err2 = mv_destroy_source(mvSource); @@ -2878,7 +2934,11 @@ int perform_hand_detection() clock_gettime(CLOCK_MONOTONIC, &s_tspec); // Hand detection - err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL); + //err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, NULL); + + printf("mem: %p\n", outputTensorData); + //err = mv_inference_pose_estimation_detect(mvSource, infer, NULL, _hand_pose_cb, outputTensorData); + err = mv_inference_hand_detect(mvSource, infer, _hand_detected_cb, outputTensorData); clock_gettime(CLOCK_MONOTONIC, &e_tspec); @@ -2888,7 +2948,7 @@ int perform_hand_detection() break; } - case 6: + case 5: { //perform destroy if (engine_cfg) { @@ -2904,6 +2964,11 @@ int perform_hand_detection() printf("Fail to destroy inference handle [err:%i]\n", err); } } + + if (outputTensorData) { + free(outputTensorData); + outputTensorData = NULL; + } } break; default: @@ -2939,6 +3004,12 @@ int perform_hand_detection() sel_opt = (do_another == 1) ? 0 : 1; } + if (outputTensorData) { + free(outputTensorData); + outputTensorData = NULL; + } + printf("outputTensorData: %p\n",outputTensorData); + return MEDIA_VISION_ERROR_NONE; } @@ -2957,7 +3028,7 @@ int main() int err = MEDIA_VISION_ERROR_NONE; while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, 6); + sel_opt = show_menu("Select Action:", options, names, 7); switch (sel_opt) { case 1: { diff --git a/test/testsuites/stream_infer/CMakeLists.txt b/test/testsuites/stream_infer/CMakeLists.txt new file mode 100644 index 00000000..dc0ab1c0 --- /dev/null +++ b/test/testsuites/stream_infer/CMakeLists.txt @@ -0,0 +1,63 @@ +project(mv_stream_infer) +cmake_minimum_required(VERSION 2.6) + +set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG _DEBUG) + +if(NOT SKIP_WARNINGS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") +endif() + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${MV_CAPI_MEDIA_VISION_INC_DIR}) +include_directories(${INC_IMAGE_HELPER}) +include_directories(${INC_VIDEO_HELPER}) +include_directories(${INC_TS_COMMON}) + +file(GLOB MV_STREAMINFER_TEST_SUITE_INC_LIST "${PROJECT_SOURCE_DIR}/*.h") +file(GLOB MV_STREAMINFER_TEST_SUITE_SRC_LIST "${PROJECT_SOURCE_DIR}/*.c") + +find_package(PkgConfig REQUIRED) +pkg_check_modules(GLIB_PKG glib-2.0) + +if (NOT GLIB_PKG_FOUND) + message(SEND_ERROR "Failed to find glib") + return() +else() + include_directories(${GLIB_PKG_INCLUDE_DIRS}) +endif() + + +SET(dependents "gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0 cairo elementary ecore-wl2 appcore-efl capi-ui-efl-util") + +INCLUDE(FindPkgConfig) +pkg_check_modules(${PROJECT_NAME} REQUIRED ${dependents}) +FOREACH(flag ${${PROJECT_NAME}_CFLAGS}) + SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}") +ENDFOREACH(flag) + +SET(CMAKE_C_FLAGS "-I./include -I./include/headers ${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} -fPIC -Wall -DEFL_BETA_API_SUPPORT=1") +SET(CMAKE_C_FLAGS_DEBUG "-O0 -g") + +add_executable(${PROJECT_NAME} + ${MV_STREAMINFER_TEST_SUITE_INC_LIST} + ${MV_STREAMINFER_TEST_SUITE_SRC_LIST} + ${MV_CAPI_MEDIA_VISION_INC_LIST}) + +target_link_libraries(${PROJECT_NAME} ${MV_INFERENCE_LIB_NAME} + gstreamer-1.0 + glib-2.0 + capi-system-info + dlog + mv_image_helper + mv_video_helper + mv_testsuite_common + cairo + m + ${${PROJECT_NAME}_LIBRARIES} + ) + +install(TARGETS ${PROJECT_NAME} DESTINATION ${testbin_dir}) diff --git a/test/testsuites/stream_infer/stream_infer.c b/test/testsuites/stream_infer/stream_infer.c new file mode 100644 index 00000000..dbd8b689 --- /dev/null +++ b/test/testsuites/stream_infer/stream_infer.c @@ -0,0 +1,1772 @@ +/** + * Copyright (c) 2015 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _USE_MATH_DEFINES +#if 0 +#include +#endif +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#define MAX(a, b) \ +({ __typeof__ (a) _a = (a); \ +__typeof__ (b) _b = (b); \ +_a > _b ? _a : _b; }) + +#define MIN(a,b) \ +({ __typeof__ (a) _a = (a); \ +__typeof__ (b) _b = (b); \ +_a < _b ? _a : _b; }) + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define WIDTH (480) +#define HEIGHT (270) + +#ifdef PACKAGE +#undef PACKAGE +#endif +#define PACKAGE "test" + +static int st = 0; +static Evas_Object *g_eo = NULL; +static Evas_Object *icon = NULL; + +/* for video display */ +static Evas_Object *g_win_id; +static Evas_Object *selected_win_id; + +typedef enum { + MODEL_TYPE_POSE_CPM = 0, + MODEL_TYPE_POSE_HOURGLASS, + MODEL_TYPE_POSE_HAND_AIC, + MODEL_TYPE_POSE_HAND_AICLite +}; + +typedef struct { + gchar *filename; + gchar *filename2; + int numbuffers; + int modelType; + Evas_Object *win; + Evas_Object *layout_main; /* layout widget based on EDJ */ + /* add more variables here */ + +} appdata; + +static mv_rectangle_s poseRoi; + +static appdata ad; +static GstBus *bus; +static guint bus_watch_id; + +#define FILE_PATH_SIZE 1024 + +// pose estimation +#define PE_TFLITE_CPM_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model_cpm.tflite" +#define PE_TFLITE_HOURGLASS_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/ped_tflite_model_hourglass.tflite" + +#define PE_TFLITE_AIC_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1.tflite" +#define PE_TFLITE_AIC_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_b_1.tflite" +#define PE_TFLITE_AICLite_1_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet1_lite_224.tflite" +#define PE_TFLITE_AICLite_2_WEIGHT_PATH "/usr/share/capi-media-vision/models/PE/tflite/posenet2_lite_224.tflite" + +static float thValNeck = 0.3f; // 15% +static float thValArm = 0.1f; // 10 % +static float thValLeg = 0.2f; // 5% + +typedef struct _rect { + int left; + int top; + int right; + int bottom; + int type; + bool updated; + bool cr_updated; +} rect; + + +typedef struct _humanSkeleton { + mv_point_s pose[21/*14*/]; + mv_point_s prevPose[21/*14*/]; + mv_rectangle_s loc; + mv_rectangle_s prevLoc; + mv_rectangle_s handRoi; + float scores[21/*14*/]; + bool isPrevPose; + bool updated; // detection is find and pose is also good. then update drawing + bool locUpdated; // track previous detection + bool IsDetected; // detection +} HumanSkeleton; + +typedef struct +{ + gboolean valid; + GstVideoInfo vinfo; +} CairoOverlayState; + +typedef struct +{ + GstBuffer *buffer; + gpointer user_data; +} HandOffData; + +//gchar *gesturenames; + +static HandOffData hdata_p; + +static GMutex pose_mutex; +static guint old_timeout = 0; +static guint nFrames = 0; + +// Gstreamer +GstElement *pipeline, *source, *filter, *toverlay, *sink, *sink2; +GstElement *tee, *vscale, *vsfilter, *vconv, *vcfilter; +GstElement *vrate, *vrfilter, *fsink, *vrsink; +GstElement *queue1, *queue2, *queue3; +GstElement *oconv, *coverlay; +GstElement *vcrop, *vcrscale, *vcrsfilter, *vcrsconv, *vcrscfilter, *vcrssink; +CairoOverlayState *overlay_state; + +GstElement *decodebin, *dscale, *dconv; +GstElement *dsfilter, *dcfilter; + +GstElement *tee2, *enc, *muxmp4, *fsink2, *queue4, *queue5, *encconv; + +//static HandSkeleton handSkeleton; +static HumanSkeleton humanSkeleton; +gulong handler_p; +GList *line_list = NULL; + +mv_source_h mv_src_p; +mv_source_h mv_src_p2; + +// Human pose +mv_engine_config_h hp_mv_engine_cfg; +mv_inference_h hp_mv_infer; + +mv_engine_config_h hp_mv_engine_cfg2; +mv_inference_h hp_mv_infer2; + +static void * outputTensorData; + +FILE *fp; + +static bool IsGestureMode; + +static int poseCropSize = 0; + +#define IMAGE_SIZE_WIDTH 640 +#define IMAGE_SIZE_HEIGHT 480 + +#define NANO_PER_SEC ((__clock_t) 1000000000) +#define NANO_PER_MILLI ((__clock_t) 1000000) +#define MILLI_PER_SEC ((__clock_t) 1000) + +struct timespec diff(struct timespec start, struct timespec end) +{ + struct timespec temp; + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = NANO_PER_SEC + end.tv_nsec - start.tv_nsec; + } + else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + return temp; +} + +unsigned long gettotalmillisec(const struct timespec time) +{ + return time.tv_sec * MILLI_PER_SEC + time.tv_nsec / NANO_PER_MILLI; +} + + +void int_handler(int sig) +{ + char c; + + signal(sig, SIG_IGN); + while ((getchar()) != '\n'); + + printf(TEXT_YELLOW "Do you want to quit? [y/n]\n" TEXT_RESET); + c = getchar(); + if (c == 'y' || c == "Y") { + + g_signal_handler_disconnect(vrsink, handler_p); +#if 0 + g_signal_handler_disconnect(vcrssink, handler_gp); +#endif + + gst_element_send_event(pipeline, gst_event_new_eos()); + + sleep(4); + + + if (mv_src_p) + mv_destroy_source(mv_src_p); + + if (hp_mv_infer) + mv_inference_destroy(hp_mv_infer); + + if (hp_mv_engine_cfg) + mv_destroy_engine_config(hp_mv_engine_cfg); + + if (mv_src_p2) + mv_destroy_source(mv_src_p2); + + if (hp_mv_infer2) + mv_inference_destroy(hp_mv_infer2); + + if (hp_mv_engine_cfg) + mv_destroy_engine_config(hp_mv_engine_cfg2); + + if (outputTensorData) { + free(outputTensorData); + outputTensorData = NULL; + } + printf(TEXT_YELLOW "exit..\n" TEXT_RESET); + signal(SIGINT, SIG_DFL); + exit(0); + } else { + printf("no"); + signal(SIGINT, int_handler); + } + + getchar(); // Get new line character +} + +void _hand_pose_cb ( + mv_source_h source, + const int number_of_pose_estimation, + const mv_point_s *locations, + void *user_data) +{ + printf("%d landmarks\n", number_of_pose_estimation); + for (int n = 0; n < number_of_pose_estimation; ++n) { + + humanSkeleton.pose[n].x = (int)((float)locations[n].x); + humanSkeleton.pose[n].y = (int)((float)locations[n].y); + humanSkeleton.scores[n] = 1.0f; /* confidences[n];*/ + + //printf("(%d,%d): %f\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y, confidences[n]); + //printf("(%d,%d)\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y); + } + humanSkeleton.IsDetected = true; + +} + +static void _hand_detected_cb ( + mv_source_h source, + const int number_of_hands, + const float *confidences, + const mv_rectangle_s *locations, + void *user_data) //user_data can be mv_source? +{ + +#if 0 + if (0 /*confidences[1] < thValNeck*/) { + printf("lost pose\n"); + humanSkeleton.IsDetected = false; + humanSkeleton.isPrevPose = false; + return; + } + + printf("%d landmarks, %d crop\n", number_of_landmarks, poseCropSize); + for (int n = 0; n < number_of_landmarks; ++n) { + + humanSkeleton.pose[n].x = (int)((float)(locations[n].x + poseRoi.point.x) / (float)poseCropSize * 640.f); + humanSkeleton.pose[n].y = (int)((float)(locations[n].y + poseRoi.point.y) / (float)poseCropSize * 480.f); + humanSkeleton.scores[n] = 1.0f; /* confidences[n];*/ + + //printf("(%d,%d): %f\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y, confidences[n]); + printf("(%d,%d)\n", humanSkeleton.pose[n].x, humanSkeleton.pose[n].y); + } + humanSkeleton.IsDetected = true; +#else + + if (number_of_hands <= 0) { + humanSkeleton.IsDetected = false; + return; + } + + + struct timespec s_tspec; + struct timespec e_tspec; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + mv_source_clear(mv_src_p2); + mv_source_fill_by_tensor_buffer(mv_src_p2, user_data, + MV_INFERENCE_DATA_FLOAT32, + 56 * 56 * 21 * sizeof(float), + 56, 56, 21, 3); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("memcpy time: %lu(ms)\n", timeDiff); + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + mv_inference_pose_estimation_detect(mv_src_p2, hp_mv_infer2, NULL, 640.f, 480.f, _hand_pose_cb, NULL); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + diffspec = diff(s_tspec, e_tspec); + timeDiff = gettotalmillisec(diffspec); + printf("pose_estimation time: %lu(ms)\n", timeDiff); + //humanSkeleton.IsDetected = true; +#endif + return; +} + + +static gboolean +run_pose (void *user_data) +{ + HandOffData *udata = (HandOffData *)user_data; + if (!GST_IS_BUFFER(udata->buffer)) + return FALSE; + + GstMapInfo map; + + /* + gst_buffer_map(udata->buffer, &map, GST_MAP_READ); + + mv_source_clear(mv_src_p); + + mv_source_fill_by_buffer(mv_src_p, map.data, 192*192*3, 192, 192, MEDIA_VISION_COLORSPACE_RGB888); + + gst_buffer_unmap(udata->buffer, &map); + + clock_t start = clock(); + mv_inference_pose_estimation_detect(mv_src_p, hp_mv_infer, &poseRoi, 1.f, 1.f, _human_pose_cb, NULL); + clock_t end = clock(); + */ + + gst_buffer_map(udata->buffer, &map, GST_MAP_READ); + + mv_source_clear(mv_src_p); + + mv_source_fill_by_buffer(mv_src_p, map.data, 224*224*3, 224, 224, MEDIA_VISION_COLORSPACE_RGB888); + + gst_buffer_unmap(udata->buffer, &map); + + + struct timespec s_tspec; + struct timespec e_tspec; + + void * outputTensorBuffer = (void*)udata->user_data; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + // invoke tflite -> _hand_detected_cb -> memcpy output -> invoke tflite -> _pose_cb + mv_inference_hand_detect(mv_src_p, hp_mv_infer, _hand_detected_cb, outputTensorBuffer); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("detect + pose time: %lu(ms)\n", timeDiff); + + return FALSE; + +} + +static void +_pose_est_handoff(GstElement *object, GstBuffer *buffer, GstPad *pad, gpointer user_data) +{ + + nFrames++; + hdata_p.buffer = buffer; + hdata_p.user_data = user_data; + +#if 0 + if (nFrames % 15 == 0) { + g_mutex_lock(&pose_mutex); + g_idle_add (run_pose, &hdata_p); + g_mutex_unlock(&pose_mutex); + } +#else + g_mutex_lock(&pose_mutex); + g_idle_add (run_pose, &hdata_p); + g_mutex_unlock(&pose_mutex); +#endif +} + +static void +prepare_overlay (GstElement * overlay, GstCaps * caps, gpointer user_data) +{ + CairoOverlayState *state = (CairoOverlayState *) user_data; + + state->valid = gst_video_info_from_caps (&state->vinfo, caps); +} + +/* +static void +draw_overlay (GstElement * overlay, cairo_t * cr, guint64 timestamp, + guint64 duration, gpointer user_data) +{ + CairoOverlayState *s = (CairoOverlayState *) user_data; + + if (!s->valid) { + printf("not ready draw_overlay"); + return; + } + + cairo_set_source_rgba(cr, 0.1, 0.9, 0.0, 0.7); + cairo_set_line_width(cr, 2.0); + + + if (!humanSkeleton.IsDetected) + return; + + + // + if (humanSkeleton.isPrevPose == false) { + humanSkeleton.prevPose[1] = humanSkeleton.pose[1]; + // head - neck + if (humanSkeleton.scores[0] >= thValNeck ) { + humanSkeleton.prevPose[0] = humanSkeleton.pose[0]; + } + + // right arm + if (humanSkeleton.scores[2] >= thValArm) { + // neck - right shoulder + humanSkeleton.prevPose[2] = humanSkeleton.pose[2]; + if (humanSkeleton.scores[3] >= thValArm) { + // right shoulder - right elbow + humanSkeleton.prevPose[3] = humanSkeleton.pose[3]; + if (humanSkeleton.scores[4] >= thValArm) { + // right elbow - right wrist + humanSkeleton.prevPose[4] = humanSkeleton.pose[4]; + } + } + } + + // left arm + if (humanSkeleton.scores[5] >= thValArm) { + // neck - right shoulder + humanSkeleton.prevPose[5] = humanSkeleton.pose[5]; + if (humanSkeleton.scores[6] >= thValArm) { + // right shoulder - right elbow + humanSkeleton.prevPose[6] = humanSkeleton.pose[6]; + if (humanSkeleton.scores[7] >= thValArm) { + // right elbow - right wrist + humanSkeleton.prevPose[7] = humanSkeleton.pose[7]; + } + } + } + + // right leg + if (humanSkeleton.scores[8] >= thValLeg) { + // neck - right shoulder + humanSkeleton.prevPose[8] = humanSkeleton.pose[8]; + if (humanSkeleton.scores[9] >= thValLeg) { + // right shoulder - right elbow + humanSkeleton.prevPose[9] = humanSkeleton.pose[9]; + if (humanSkeleton.scores[10] >= thValLeg) { + // right elbow - right wrist + humanSkeleton.prevPose[10] = humanSkeleton.pose[10]; + } + } + } + + // left leg + if (humanSkeleton.scores[11] >= thValLeg) { + // neck - right shoulder + humanSkeleton.prevPose[11] = humanSkeleton.pose[11]; + if (humanSkeleton.scores[12] >= thValLeg) { + // right shoulder - right elbow + humanSkeleton.prevPose[12] = humanSkeleton.pose[12]; + if (humanSkeleton.scores[13] >= thValLeg) { + // right elbow - right wrist + humanSkeleton.prevPose[13] = humanSkeleton.pose[13]; + } + } + } + humanSkeleton.isPrevPose = true; + } else { + // weighted sum of pose and prevPose + // method1: fixed weights (pose : prevPose = 0.7 : 0.3) + float poseWeight = 0.7f; + float prevPoseWeight = 0.3f; + humanSkeleton.prevPose[1].x = (poseWeight * humanSkeleton.pose[1].x + + prevPoseWeight * humanSkeleton.prevPose[1].x); + humanSkeleton.prevPose[1].y = (poseWeight * humanSkeleton.pose[1].y + + prevPoseWeight * humanSkeleton.prevPose[1].y); + // head - neck + if (humanSkeleton.scores[0] >= thValNeck ) { + humanSkeleton.prevPose[0].x = (poseWeight * humanSkeleton.pose[0].x + + prevPoseWeight * humanSkeleton.prevPose[0].x); + humanSkeleton.prevPose[0].y = (poseWeight * humanSkeleton.pose[0].y + + prevPoseWeight * humanSkeleton.prevPose[0].y); + } + + // right arm + if (humanSkeleton.scores[2] >= thValArm) { + // neck - right shoulder + humanSkeleton.prevPose[2].x = (poseWeight * humanSkeleton.pose[2].x + + prevPoseWeight * humanSkeleton.prevPose[2].x); + humanSkeleton.prevPose[2].y = (poseWeight * humanSkeleton.pose[2].y + + prevPoseWeight * humanSkeleton.prevPose[2].y); + if (humanSkeleton.scores[3] >= thValArm) { + // right shoulder - right elbow + humanSkeleton.prevPose[3].x = (poseWeight * humanSkeleton.pose[3].x + + prevPoseWeight * humanSkeleton.prevPose[3].x); + humanSkeleton.prevPose[3].y = (poseWeight * humanSkeleton.pose[3].y + + prevPoseWeight * humanSkeleton.prevPose[3].y); + if (humanSkeleton.scores[4] >= thValArm) { + // right elbow - right wrist + humanSkeleton.prevPose[4].x = (poseWeight * humanSkeleton.pose[4].x + + prevPoseWeight * humanSkeleton.prevPose[4].x); + humanSkeleton.prevPose[4].y = (poseWeight * humanSkeleton.pose[4].y + + prevPoseWeight * humanSkeleton.prevPose[4].y); + } + } + } + + // left arm + if (humanSkeleton.scores[5] >= thValArm) { + // neck - right shoulder + humanSkeleton.prevPose[5].x = (poseWeight * humanSkeleton.pose[5].x + + prevPoseWeight * humanSkeleton.prevPose[5].x); + humanSkeleton.prevPose[5].y = (poseWeight * humanSkeleton.pose[5].y + + prevPoseWeight * humanSkeleton.prevPose[5].y); + if (humanSkeleton.scores[6] >= thValArm) { + // right shoulder - right elbow + humanSkeleton.prevPose[6].x = (poseWeight * humanSkeleton.pose[6].x + + prevPoseWeight * humanSkeleton.prevPose[6].x); + humanSkeleton.prevPose[6].y = (poseWeight * humanSkeleton.pose[6].y + + prevPoseWeight * humanSkeleton.prevPose[6].y); + if (humanSkeleton.scores[7] >= thValArm) { + // right elbow - right wrist + humanSkeleton.prevPose[7].x = (poseWeight * humanSkeleton.pose[7].x + + prevPoseWeight * humanSkeleton.prevPose[7].x); + humanSkeleton.prevPose[7].y = (poseWeight * humanSkeleton.pose[7].y + + prevPoseWeight * humanSkeleton.prevPose[7].y); + } + } + } + + // right leg + if (humanSkeleton.scores[8] >= thValLeg) { + // neck - right shoulder + humanSkeleton.prevPose[8].x = (poseWeight * humanSkeleton.pose[8].x + + prevPoseWeight * humanSkeleton.prevPose[8].x); + humanSkeleton.prevPose[8].y = (poseWeight * humanSkeleton.pose[8].y + + prevPoseWeight * humanSkeleton.prevPose[8].y); + if (humanSkeleton.scores[9] >= thValLeg) { + // right shoulder - right elbow + humanSkeleton.prevPose[9].x = (poseWeight * humanSkeleton.pose[9].x + + prevPoseWeight * humanSkeleton.prevPose[9].x); + humanSkeleton.prevPose[9].y = (poseWeight * humanSkeleton.pose[9].y + + prevPoseWeight * humanSkeleton.prevPose[9].y); + if (humanSkeleton.scores[10] >= thValLeg) { + // right elbow - right wrist + humanSkeleton.prevPose[10].x = (poseWeight * humanSkeleton.pose[10].x + + prevPoseWeight * humanSkeleton.prevPose[10].x); + humanSkeleton.prevPose[10].y = (poseWeight * humanSkeleton.pose[10].y + + prevPoseWeight * humanSkeleton.prevPose[10].y); + } + } + } + + // left leg + if (humanSkeleton.scores[11] >= thValLeg) { + // neck - right shoulder + humanSkeleton.prevPose[11].x = (poseWeight * humanSkeleton.pose[11].x + + prevPoseWeight * humanSkeleton.prevPose[11].x); + humanSkeleton.prevPose[11].y = (poseWeight * humanSkeleton.pose[11].y + + prevPoseWeight * humanSkeleton.prevPose[11].y); + if (humanSkeleton.scores[12] >= thValLeg) { + // right shoulder - right elbow + humanSkeleton.prevPose[12].x = (poseWeight * humanSkeleton.pose[12].x + + prevPoseWeight * humanSkeleton.prevPose[12].x); + humanSkeleton.prevPose[12].y = (poseWeight * humanSkeleton.pose[12].y + + prevPoseWeight * humanSkeleton.prevPose[12].y); + if (humanSkeleton.scores[13] >= thValLeg) { + // right elbow - right wrist + humanSkeleton.prevPose[13].x = (poseWeight * humanSkeleton.pose[13].x + + prevPoseWeight * humanSkeleton.prevPose[13].x); + humanSkeleton.prevPose[13].y = (poseWeight * humanSkeleton.pose[13].y + + prevPoseWeight * humanSkeleton.prevPose[13].y); + } + } + } + } + + // + //draw.. + // head - neck + if (humanSkeleton.scores[0] >= thValNeck ) { + cairo_move_to(cr, humanSkeleton.prevPose[0].x, humanSkeleton.prevPose[0].y); + cairo_line_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y); + } + + // right arm + cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y); + if (humanSkeleton.scores[2] >= thValArm) { + // neck - right shoulder + cairo_line_to(cr, humanSkeleton.prevPose[2].x, humanSkeleton.prevPose[2].y); + if (humanSkeleton.scores[3] >= thValArm) { + // right shoulder - right elbow + cairo_line_to(cr, humanSkeleton.prevPose[3].x, humanSkeleton.prevPose[3].y); + if (humanSkeleton.scores[4] >= thValArm) { + // right elbow - right wrist + cairo_line_to(cr, humanSkeleton.prevPose[4].x, humanSkeleton.prevPose[4].y); + } + } + } + cairo_stroke(cr); + + // left arm + cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y); + if (humanSkeleton.scores[5] >= thValArm) { + // neck - right shoulder + cairo_line_to(cr, humanSkeleton.prevPose[5].x, humanSkeleton.prevPose[5].y); + if (humanSkeleton.scores[6] >= thValArm) { + // right shoulder - right elbow + cairo_line_to(cr, humanSkeleton.prevPose[6].x, humanSkeleton.prevPose[6].y); + if (humanSkeleton.scores[7] >= thValArm) { + // right elbow - right wrist + cairo_line_to(cr, humanSkeleton.prevPose[7].x, humanSkeleton.prevPose[7].y); + } + } + } + cairo_stroke(cr); + + + // right leg + cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y); + if (humanSkeleton.scores[8] >= thValLeg) { + // neck - right shoulder + cairo_line_to(cr, humanSkeleton.prevPose[8].x, humanSkeleton.prevPose[8].y); + if (humanSkeleton.scores[9] >= thValLeg) { + // right shoulder - right elbow + cairo_line_to(cr, humanSkeleton.prevPose[9].x, humanSkeleton.prevPose[9].y); + if (humanSkeleton.scores[10] >= thValLeg) { + // right elbow - right wrist + cairo_line_to(cr, humanSkeleton.prevPose[10].x, humanSkeleton.prevPose[10].y); + } + } + } + cairo_stroke(cr); + + // left leg + cairo_move_to(cr, humanSkeleton.prevPose[1].x, humanSkeleton.prevPose[1].y); + if (humanSkeleton.scores[11] >= thValLeg) { + // neck - right shoulder + cairo_line_to(cr, humanSkeleton.prevPose[11].x, humanSkeleton.prevPose[11].y); + if (humanSkeleton.scores[12] >= thValLeg) { + // right shoulder - right elbow + cairo_line_to(cr, humanSkeleton.prevPose[12].x, humanSkeleton.prevPose[12].y); + if (humanSkeleton.scores[13] >= thValLeg) { + // right elbow - right wrist + cairo_line_to(cr, humanSkeleton.prevPose[13].x, humanSkeleton.prevPose[13].y); + } + } + } + cairo_stroke(cr); +} +*/ +static void +draw_overlay_hand (GstElement * overlay, cairo_t * cr, guint64 timestamp, + guint64 duration, gpointer user_data) +{ + CairoOverlayState *s = (CairoOverlayState *) user_data; + + if (!s->valid) { + printf("not ready draw_overlay"); + return; + } + + cairo_set_source_rgba(cr, 0.1, 0.9, 0.0, 0.7); + cairo_set_line_width(cr, 2.0); + + + if (!humanSkeleton.IsDetected) + return; + + + // thumb - red + cairo_set_source_rgba (cr, 0.9, 0.1, 0.0, 0.7); + cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y); + for (int k = 1 ; k < 5; ++k) { + cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y); + } + cairo_stroke(cr); + + // fore - red + cairo_set_source_rgba (cr, 0.9, 0.1, 0.0, 0.7); + cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y); + for (int k = 5 ; k < 9; ++k) { + cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y); + } + cairo_stroke(cr); + + // middle - grean + cairo_set_source_rgba (cr, 0.1, 0.9, 0.0, 0.7); + cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y); + for (int k = 9 ; k < 13; ++k) { + cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y); + } + cairo_stroke(cr); + + // ring - blue + cairo_set_source_rgba (cr, 0.1, 0.0, 0.9, 0.7); + cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y); + for (int k = 13 ; k < 17; ++k) { + cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y); + } + cairo_stroke(cr); + + // little - purple + cairo_set_source_rgba (cr, 0.5, 0.0, 0.5, 0.7); + cairo_move_to(cr, humanSkeleton.pose[0].x, humanSkeleton.pose[0].y); + for (int k = 17 ; k < 21; ++k) { + cairo_line_to(cr, humanSkeleton.pose[k].x, humanSkeleton.pose[k].y); + } + cairo_stroke(cr); +} + +static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data) +{ + + switch (GST_MESSAGE_TYPE (msg)) { + + case GST_MESSAGE_EOS: + printf ("End of stream\n"); + break; + + case GST_MESSAGE_ERROR: { + gchar *debug; + GError *error; + + gst_message_parse_error (msg, &error, &debug); + g_free (debug); + + printf ("Error: %s\n", error->message); + g_error_free (error); + + break; + } + default: + break; + } + + return TRUE; +} + +int perform_armnn_human_pose_cpm_configure(mv_engine_config_h mv_engine_cfg) +{ + if (mv_engine_cfg == NULL) { + printf("mv_engine_cfg is null\n"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + char *inputNodeName = "image"; + char *outputNodeName[1] = {"Convolutional_Pose_Machine/stage_5_out"}; + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_CPM_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_ARMNN); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_GPU); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 192); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 192); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 3); + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(mv_engine_cfg, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, + 1); + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_armnn_human_pose_hourglass_configure(mv_engine_config_h mv_engine_cfg) +{ + if (mv_engine_cfg == NULL) { + printf("mv_engine_cfg is null\n"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + char *inputNodeName = "image"; + char *outputNodeName[1] = {"hourglass_out_3"}; + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_HOURGLASS_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_ARMNN); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_GPU); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 192); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 192); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 3); + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(mv_engine_cfg, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeName, + 1); + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_tflite_hand_detection_AIC(mv_engine_config_h mv_engine_cfg) +{ + if (mv_engine_cfg == NULL) { + printf("mv_engine_cfg is null\n"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + char *inputNodeName = "input"; + char *outputNodeNames[2] = {"mobilenetv2/boundingbox2", "mobilenetv2/heatmap"}; + + //outputTensorData = (void*)calloc(56*56*21, sizeof(float)); + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_AIC_1_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 224); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 224); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 3); + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(mv_engine_cfg, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeNames, + 2); + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_tflite_hand_detection_AIC2(mv_engine_config_h mv_engine_cfg) +{ + char *inputNodeName = "input"; + char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"}; + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_AIC_2_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 56); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 56); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 21); + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(mv_engine_cfg, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeNames, + 2); + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_tflite_hand_detection_AICLite(mv_engine_config_h mv_engine_cfg) +{ + if (mv_engine_cfg == NULL) { + printf("mv_engine_cfg is null\n"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + char *inputNodeName = "input"; + char *outputNodeNames[2] = {"mobilenetv2/boundingbox", "mobilenetv2/heatmap"}; + + //outputTensorData = (void*)calloc(56*56*21, sizeof(float)); + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_AICLite_1_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 224); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 224); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 3); + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(mv_engine_cfg, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeNames, + 2); + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_tflite_hand_detection_AICLite2(mv_engine_config_h mv_engine_cfg) +{ + char *inputNodeName = "input"; + char *outputNodeNames[2] = {"mobilenetv2/coord_refine", "mobilenetv2/gesture"}; + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + PE_TFLITE_AICLite_2_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(mv_engine_cfg, + MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CPU); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_WIDTH, + 56); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 56); + + mv_engine_config_set_int_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, + 21); + + mv_engine_config_set_string_attribute(mv_engine_cfg, + MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute(mv_engine_cfg, + MV_INFERENCE_OUTPUT_NODE_NAMES, + outputNodeNames, + 2); + + return MEDIA_VISION_ERROR_NONE; +} + +static void __global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) +{ + struct tizen_surface **tz_surface = NULL; + + if (!data) { + printf("NULL data\n"); + return; + } + + tz_surface = (struct tizen_surface **)data; + + if (!interface) { + printf("NULL interface\n"); + return; + } + + if (strcmp(interface, "tizen_surface") == 0) { + printf("binding tizen surface for wayland\n"); + + *tz_surface = wl_registry_bind(registry, name, &tizen_surface_interface, 1); + if (*tz_surface == NULL) + printf("failed to bind\n"); + + printf("done\n"); + } + + return; +} + +static void __global_remove(void *data, struct wl_registry *wl_registry, uint32_t name) +{ + printf("enter\n"); + return; +} + +static const struct wl_registry_listener _wl_registry_listener = { + __global, + __global_remove +}; + +void __parent_id_getter(void *data, struct tizen_resource *tizen_resource, uint32_t id) +{ + if (!data) { + printf("NULL data\n"); + return; + } + + *((unsigned int *)data) = id; + + printf("[CLIENT] got parent_id [%u] from server\n", id); + + return; +} + +static const struct tizen_resource_listener _tz_resource_listener = { + __parent_id_getter +}; + +static void set_overlay(Ecore_Evas *ee) +{ + Ecore_Wl2_Window *window = NULL; + Ecore_Wl2_Display *e_wl2_display = NULL; + + struct wl_display *display = NULL; + struct wl_display *display_wrapper = NULL; + struct wl_surface *surface = NULL; + struct wl_registry *registry = NULL; + struct wl_event_queue *queue = NULL; + struct tizen_surface *tz_surface = NULL; + struct tizen_resource *tz_resource = NULL; + + window = ecore_evas_wayland2_window_get(ee); + if (!window) { + printf("failed to get wayland window\n"); + goto _DONE; + } + + /* set video_has flag to a video application window */ + ecore_wl2_window_video_has(window, EINA_TRUE); + + surface = (struct wl_surface *)ecore_wl2_window_surface_get(window); + if (!surface) { + printf("failed to get wayland surface\n"); + goto _DONE; + } + + e_wl2_display = ecore_wl2_connected_display_get(NULL); + if (!e_wl2_display) { + printf("failed to get ecore wl2 display\n"); + goto _DONE; + } + + display = (struct wl_display *)ecore_wl2_display_get(e_wl2_display); + if (!display) { + printf("failed to get wayland display\n"); + goto _DONE; + } + + display_wrapper = wl_proxy_create_wrapper(display); + if (!display_wrapper) { + printf("failed to create wl display wrapper\n"); + } + + queue = wl_display_create_queue(display); + if (!queue) { + printf("failed to create wl display queue\n"); + goto _DONE; + } + + wl_proxy_set_queue((struct wl_proxy *)display_wrapper, queue); + + registry = wl_display_get_registry(display_wrapper); + if (!registry) { + printf("failed to get wayland registry\n"); + goto _DONE; + } + + wl_registry_add_listener(registry, &_wl_registry_listener, &tz_surface); + + wl_display_dispatch_queue(display, queue); + wl_display_roundtrip_queue(display, queue); + + if (!tz_surface) { + printf("failed to get tizen surface\n"); + goto _DONE; + } + + /* Get parent_id which is unique in a entire systemw. */ + tz_resource = tizen_surface_get_tizen_resource(tz_surface, surface); + if (!tz_resource) { + printf("failed to get tizen resurce\n"); + goto _DONE; + } + + int parent_id = 0; + + tizen_resource_add_listener(tz_resource, &_tz_resource_listener, &parent_id); + + wl_display_roundtrip_queue(display, queue); + + if (parent_id > 0) { + printf("parent id : %u\n", parent_id); + + gst_video_overlay_set_wl_window_wl_surface_id(GST_VIDEO_OVERLAY(sink2), parent_id); + gst_element_set_state (pipeline, GST_STATE_PLAYING); + } else { + printf("failed to get parent id\n"); + } + +_DONE: + if (tz_surface) { + tizen_surface_destroy(tz_surface); + tz_surface = NULL; + } + + if (tz_resource) { + tizen_resource_destroy(tz_resource); + tz_resource = NULL; + } + + if (registry) { + wl_registry_destroy(registry); + registry = NULL; + } + + if (queue) { + wl_event_queue_destroy(queue); + queue = NULL; + } + + if (display_wrapper) { + wl_proxy_wrapper_destroy(display_wrapper); + display_wrapper = NULL; + } +} + +static void win_resize_cb (void *data, Evas *e, Evas_Object *obj, void *event_info) +{ + int w, h; + int wid; + + printf("_canvas_resize_cb\n"); + + Evas * _e = evas_object_evas_get(obj); + Ecore_Evas *ee = ecore_evas_ecore_evas_get(_e); + ecore_evas_geometry_get(ee, NULL, NULL, &w, &h); + + if (!st) { + set_overlay(ee); + int x, y; + elm_win_screen_position_get(obj, &x, &y); + printf("x = %d, y = %d\n", x, y); + } else + st++; +} + +static void cb_new_pad (GstElement *element, GstPad *pad, gpointer data) +{ + gchar *name; + GstElement *other = data; + + name = gst_pad_get_name (pad); + printf ("A new pad %s was created for %s\n", name, gst_element_get_name(element)); + g_free (name); + + printf ("element %s will be linked to %s\n", + gst_element_get_name(element), + gst_element_get_name(dscale)); + gst_element_link(element, dscale); +} + +static int app_create(void *data) +{ + appdata *ad = data; + Evas_Object *win = NULL; + + // GST + g_mutex_init(&pose_mutex); + + signal(SIGINT, int_handler); + + /* initialization */ + gst_init(NULL, NULL); + + /* mediavision configuration*/ + IsGestureMode = false; + if (hp_mv_engine_cfg == NULL) { + mv_create_engine_config(&hp_mv_engine_cfg); + } + + if (hp_mv_engine_cfg2 == NULL) { + mv_create_engine_config(&hp_mv_engine_cfg2); + } + + int err = MEDIA_VISION_ERROR_NONE; + + + if (ad->modelType == MODEL_TYPE_POSE_CPM) { + err = perform_armnn_human_pose_cpm_configure(hp_mv_engine_cfg); + } else if (ad->modelType == MODEL_TYPE_POSE_HOURGLASS) { + err = perform_armnn_human_pose_hourglass_configure(hp_mv_engine_cfg); + } else if (ad->modelType == MODEL_TYPE_POSE_HAND_AIC) { + outputTensorData = (void*)calloc(56*56*21, sizeof(float)); + err = perform_tflite_hand_detection_AIC(hp_mv_engine_cfg); + + err = perform_tflite_hand_detection_AIC2(hp_mv_engine_cfg2); + } else { + outputTensorData = (void*)calloc(56*56*21, sizeof(float)); + err = perform_tflite_hand_detection_AICLite(hp_mv_engine_cfg); + + err = perform_tflite_hand_detection_AICLite2(hp_mv_engine_cfg2); + } + + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Error on perform_armnn_human_pose_configure"); + } + + printf("configuration done\n"); + + printf("loading pose model: "); + mv_inference_create(&hp_mv_infer); + + mv_inference_configure(hp_mv_infer, hp_mv_engine_cfg); + + clock_t start = clock(); + mv_inference_prepare(hp_mv_infer); + clock_t end = clock(); + printf("time: %2.3f\n", (double)(end - start)/CLOCKS_PER_SEC); + + + mv_inference_create(&hp_mv_infer2); + + mv_inference_configure(hp_mv_infer2, hp_mv_engine_cfg2); + + start = clock(); + mv_inference_prepare(hp_mv_infer2); + end = clock(); + printf("time: %2.3f\n", (double)(end - start)/CLOCKS_PER_SEC); + + /* mediavision source */ + mv_create_source(&mv_src_p); + mv_create_source(&mv_src_p2); + + pipeline = gst_pipeline_new("app"); + + overlay_state = g_new0 (CairoOverlayState, 1); + + /* create gstreamer elements */ + if (!ad->filename) { + source = gst_element_factory_make("v4l2src", "src"); + filter = gst_element_factory_make("capsfilter", "filter"); + } else { + source = gst_element_factory_make("filesrc", "src"); + + decodebin = gst_element_factory_make("decodebin", "dbin"); + dscale = gst_element_factory_make("videoscale", "dscale"); + dconv = gst_element_factory_make("videoconvert", "dconv"); + dsfilter = gst_element_factory_make("capsfilter", "dsfilter"); + dcfilter = gst_element_factory_make("capsfilter", "dcfilter"); + } + + if (ad->filename2) { + tee2 = gst_element_factory_make("tee", "tee2"); + enc = gst_element_factory_make("avenc_mpeg4", "enc"); + muxmp4 = gst_element_factory_make("mp4mux", "muxmp4"); + fsink2 = gst_element_factory_make("filesink", "fsink2"); + queue4 = gst_element_factory_make("queue", "queue4"); + queue5 = gst_element_factory_make("queue", "queue5"); + encconv = gst_element_factory_make("videoconvert", "encconv"); + } + + + tee = gst_element_factory_make("tee", "tee"); + queue1 = gst_element_factory_make("queue", "queue1"); + queue2 = gst_element_factory_make("queue", "queue2"); + + if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) { + queue3 = gst_element_factory_make("queue", "queue3"); + } + + // queue1 - videoscale - capsfilter -viedoeconvert - capsfilter - videorate - capsfilter -fakesink + vscale = gst_element_factory_make("videoscale", "scale"); + vsfilter = gst_element_factory_make("capsfilter", "vsfilter"); + vconv = gst_element_factory_make("videoconvert", "convert"); + vcfilter = gst_element_factory_make("capsfilter", "vcfilter"); + vrate = gst_element_factory_make("videorate", "rate"); + vrfilter = gst_element_factory_make("capsfilter", "vrfilter"); + vrsink = gst_element_factory_make("fakesink", "vrsink"); + + // queue2 - videoconvert - cairooveray - tizenwlsink + oconv = gst_element_factory_make("videoconvert", "oconv"); + coverlay = gst_element_factory_make("cairooverlay", "coverlay"); + sink = gst_element_factory_make("fpsdisplaysink", "vsink"); + sink2 = gst_element_factory_make("tizenwlsink", "vsink2"); + + // after detection, crop using video crop + // queue3 - videocrop - videoscale -capsfilter - videoconvert - capsfilter -fakesink + + if (0/*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) { + vcrop = gst_element_factory_make("videocrop", "crop"); + vcrscale = gst_element_factory_make("videoscale", "crscale"); + vcrsfilter = gst_element_factory_make("capsfilter", "vcrsfilter"); + vcrsconv = gst_element_factory_make("videoconvert", "vcrsconvert"); + vcrscfilter = gst_element_factory_make("capsfilter", "vcrscfilter"); + vcrssink = gst_element_factory_make("fakesink", "vcrssink"); + } + + if (!pipeline || !source || + !tee || !queue1 || !vscale || !vsfilter || !vconv || !vcfilter || + !vrate || !vrfilter || !vrsink || + !queue2 || !oconv || !coverlay || !sink || !sink2) { + printf(TEXT_RED "One element(queue1 or queue2) might be not created. Exiting.\n" TEXT_RESET); + return -1; + } + + if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC && (!pipeline || !queue3 + || !vcrop || !vcrscale || !vcrsfilter || !vcrsconv || !vcrscfilter || !vcrssink*/) { + printf(TEXT_RED "One element(queue3) might be not created. Exiting.\n" TEXT_RESET); + return -1; + } + + if (!ad->filename) { + if (!filter) { + printf(TEXT_RED "One element might be not created. Existing.\n" TEXT_RESET); + return -1; + } + g_object_set(G_OBJECT(filter), "caps", gst_caps_from_string("video/x-raw, format=YUY2, width=640, height=480"), NULL); + } else { + if (!decodebin || !dscale || !dconv) { + printf(TEXT_RED "One element might be not created. Exiting.\n" TEXT_RESET); + return -1; + } + g_object_set(G_OBJECT(dsfilter), "caps", gst_caps_from_string("video/x-raw, width=640, height=480"), NULL); + g_object_set(G_OBJECT(dcfilter), "caps", gst_caps_from_string("video/x-raw, format=YUY2, width=640, height=480"), NULL); + g_signal_connect (decodebin, "pad-added", G_CALLBACK (cb_new_pad), NULL); + } + + if (ad->filename2) { + if (!tee2 || !enc || !muxmp4 || !queue4 || !queue5 || !fsink2 || !encconv) { + printf(TEXT_RED "One element might be not created. Exiting.\n" TEXT_RESET); + printf("%p, %p, %p, %p, %p, %p, %p\n", tee2, enc, muxmp4, queue4, queue5, fsink2, encconv); + return -1; + } + g_object_set(G_OBJECT(fsink2), "location", ad->filename2, NULL); + + } + + /* set up the pipeline */ + //g_signal_connect (coverlay, "draw", G_CALLBACK (draw_overlay), overlay_state); + g_signal_connect (coverlay, "draw", G_CALLBACK (draw_overlay_hand), overlay_state); + + g_signal_connect (coverlay, "caps-changed", G_CALLBACK (prepare_overlay), overlay_state); + + if (!ad->filename) { + g_object_set(G_OBJECT(source), "device", "/dev/video8", NULL); // 252 + } else { + g_object_set(G_OBJECT(source), "location", ad->filename, NULL); + g_object_set(G_OBJECT(source), "num-buffers", ad->numbuffers, NULL); + } + + g_object_set(G_OBJECT(sink2), "use-tbm", FALSE, NULL); + g_object_set(G_OBJECT(sink2), "sync", FALSE, NULL); + g_object_set(G_OBJECT(sink), "video-sink", sink2, NULL); + g_object_set(G_OBJECT(sink), "sync", FALSE, NULL); + + if (ad->modelType == MODEL_TYPE_POSE_HAND_AIC || + ad->modelType == MODEL_TYPE_POSE_HAND_AICLite) { + g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=224, height=224"), NULL); + poseCropSize = 224; + } else { + g_object_set(G_OBJECT(vsfilter), "caps", gst_caps_from_string("video/x-raw, width=192, height=192"), NULL); + poseCropSize = 192; + } + + g_object_set(G_OBJECT(vcfilter), "caps", gst_caps_from_string("video/x-raw, format=RGB"), NULL); + g_object_set(G_OBJECT(vrfilter), "caps", gst_caps_from_string("video/x-raw, framerate=15/1"), NULL); + + //g_object_set(G_OBJECT(vrate), "drop-only", TRUE, NULL); + + //g_object_set(G_OBJECT(queue2), "leaky", 2, NULL); +#if 0 + g_object_set(G_OBJECT(queue3), "max-size-buffers", 0, NULL); + g_object_set(G_OBJECT(queue3), "max-size-time", 0, NULL); + g_object_set(G_OBJECT(queue3), "max-size-bytes", 0, NULL); +#endif + //g_object_set(G_OBJECT(queue3), "leaky", 2, NULL); + + // here to be continue + printf("vrsink signal-handoffs\n"); + g_object_set(G_OBJECT(vrsink), "signal-handoffs", TRUE, NULL); + handler_p = g_signal_connect (vrsink, "handoff", G_CALLBACK(_pose_est_handoff), outputTensorData); + g_object_set(G_OBJECT(vrsink), "sync", FALSE, NULL); + + + g_object_set(G_OBJECT(vcrssink), "sync", FALSE, NULL); + + + /* add a message handler */ + bus = gst_pipeline_get_bus (GST_PIPELINE(pipeline)); + bus_watch_id = gst_bus_add_watch(bus, bus_call, NULL); + gst_object_unref(bus); + + /* add elements into the pipeline */ + gst_bin_add_many(GST_BIN(pipeline), + source, + tee, queue1, vscale, vsfilter, vconv, vcfilter, + vrate, vrfilter, vrsink, + queue2, oconv, coverlay, sink, + NULL); + + if (0 /*ad->modelType == MODEL_TYPE_POSE_HAND_AIC*/) { + gst_bin_add_many(GST_BIN(pipeline), + queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, + NULL); + gst_element_link_many (tee, queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL); + } + + /* link elements */ + if (!ad->filename) { + gst_bin_add(GST_BIN(pipeline), filter); + gst_element_link_many(source, filter, tee, NULL); + } else { + gst_bin_add_many(GST_BIN(pipeline), decodebin, dscale, dconv, dsfilter, dcfilter, NULL); + gst_element_link_many(source, decodebin, NULL); + gst_element_link_many(dscale, dsfilter, dconv, dcfilter, tee, NULL); + } + + + if (!ad->filename2) { + // display + gst_element_link_many (tee, queue2, oconv, coverlay, /*toverlay,*/ sink, NULL); + // pose + gst_element_link_many (tee, queue1, vrate, vrfilter, vconv, vcfilter, vscale, vsfilter, vrsink, NULL); + } else { + + gst_bin_add_many(GST_BIN(pipeline), tee2, enc, muxmp4, fsink2, queue4, queue5, encconv); + + // display + gst_element_link_many (tee, queue2, oconv, coverlay, tee2, NULL); + gst_element_link_many (tee2, queue4, sink, NULL); + gst_element_link_many (tee2, queue5, encconv, enc, muxmp4, fsink2, NULL); + + // pose + gst_element_link_many (tee, queue1, vrate, vrfilter, vconv, vcfilter, vscale, vsfilter, vrsink, NULL); + } + + /* set the pipeline state to "playing" state */ + //gst_element_set_state(pipeline, GST_STATE_PLAYING); + + /* loop */ + humanSkeleton.IsDetected = false; + humanSkeleton.isPrevPose = false; + printf(TEXT_GREEN "Running.....\n" TEXT_RESET); + // GST_END +#if 0 + /* use gl backend */ + elm_config_accel_preference_set("opengl"); + + /* create window */ + //win = elm_win_add(NULL, PACKAGE, ELM_WIN_SPLASH ); + win = elm_win_add(NULL, PACKAGE, ELM_WIN_BASIC); + if (win) { + elm_win_title_set(win, PACKAGE); + elm_win_borderless_set(win, EINA_TRUE); + elm_win_autodel_set(win, EINA_TRUE); + elm_win_alpha_set(win, EINA_FALSE); + evas_object_show(win); + } + elm_win_layer_set(win, 9); + elm_win_prop_focus_skip_set(win, EINA_TRUE); + + ad->win = win; + g_win_id = win; + selected_win_id = g_win_id; + + Evas_Object *bg = elm_bg_add(win); + elm_win_resize_object_add(win, bg); + evas_object_size_hint_min_set(bg, WIDTH, HEIGHT); + evas_object_size_hint_max_set(bg, WIDTH, HEIGHT); + evas_object_show(bg); + + elm_win_activate(win); + + + evas_object_event_callback_add(win, EVAS_CALLBACK_RESIZE, win_resize_cb, NULL); +#else + gst_element_set_state (pipeline, GST_STATE_PLAYING); +#endif + return 0; +} + + +static int app_terminate(void *data) +{ + appdata *ad = data; + int i = 0; + + /* out of loop */ + printf(TEXT_GREEN "Stopping.....\n" TEXT_RESET); + gst_element_set_state(pipeline, GST_STATE_NULL); + + printf(TEXT_GREEN "pipe unref.....\n" TEXT_RESET); + gst_object_unref(GST_OBJECT(pipeline)); + + printf(TEXT_GREEN "remove bus_watch id.....\n" TEXT_RESET); + g_source_remove(bus_watch_id); + + if (overlay_state != NULL) { + printf(TEXT_GREEN "g_free overlay.....\n" TEXT_RESET); + g_free(overlay_state); + } + + g_mutex_clear(&pose_mutex); + printf(TEXT_GREEN "done.....\n" TEXT_RESET); + + if (g_win_id) { + evas_object_del(g_win_id); + g_win_id = NULL; + } + ad->win = NULL; + selected_win_id = NULL; + + return 0; +} +struct appcore_ops ops = { + .create = app_create, + .terminate = app_terminate, +}; + +int main (int argc, char *argv[]) +{ + memset(&ad, 0x0, sizeof(appdata)); + ops.data = &ad; + + if (argc >= 6) { + ad.filename = g_strdup(argv[5]); + printf("launch with file source (%s)\n", ad.filename); + if (argc > 6) { + ad.filename2 = g_strdup(argv[6]); + ad.numbuffers = -1; + printf("records output(%s)\n", ad.filename2); + } + if (argc > 7) { + ad.numbuffers = atoi(argv[7]); + } + } else { + printf("launch with camera source\n"); + } + + if (argc < 2) { + printf("usage: mv_stream_infer model [NeckThresVal, ArmThresVal, LegThresVal, [filename]]"); + printf("model: 0(CPM), 1(HOURGLASS), 2(AIC Hand), 3(AIC Lite Hand\n"); + return -1; + } + + ad.modelType = atoi(argv[1]); + if (ad.modelType < 0 || ad.modelType > 3) { + printf("not supported model type\n"); + return -1; + } + + if (ad.modelType != MODEL_TYPE_POSE_HAND_AIC && + ad.modelType != MODEL_TYPE_POSE_HAND_AICLite) { + thValNeck = (float)atoi(argv[2])/100.f; + thValArm = (float)atoi(argv[3])/100.f; + thValLeg = (float)atoi(argv[4])/100.f; + + poseRoi.point.x = 50; + poseRoi.point.y = 0; + poseRoi.width = 100; + poseRoi.height = 192; + } else { + if (argc > 2) { + ad.filename2 = g_strdup(argv[2]); + } + poseRoi.point.x = 0; + poseRoi.point.y = 0; + poseRoi.width = 0; + poseRoi.height = 0; + } + + return appcore_efl_main(PACKAGE, &argc, &argv, &ops); +}