[Inference] Add new api mv_inference_pose_landmark_detect() and mv_pose_compare(...

author Tae-Young Chung <ty83.chung@samsung.com>

Wed, 8 Jul 2020 07:26:25 +0000 (16:26 +0900)

committer Tae-Young Chung <ty83.chung@samsung.com>

Wed, 12 Aug 2020 01:31:56 +0000 (01:31 +0000)
author Tae-Young Chung <ty83.chung@samsung.com>
Wed, 8 Jul 2020 07:26:25 +0000 (16:26 +0900)
committer Tae-Young Chung <ty83.chung@samsung.com>
Wed, 12 Aug 2020 01:31:56 +0000 (01:31 +0000)
diff --git a/include/mv_inference.h b/include/mv_inference.h

index a07f2eb..5edb09e 100644 (file)
--- a/include/mv_inference.h
+++ b/include/mv_inference.h
@@ -224,6 +224,7 @@ extern "C" {
   */
  #define MV_INFERENCE_CONFIDENCE_THRESHOLD "MV_INFERENCE_CONFIDENCE_THRESHOLD"
  
+
  /*************/
  /* Inference */
  /*************/
@@ -678,6 +679,234 @@ int mv_inference_facial_landmark_detect(
         void *user_data);
  
  /**
+ * @brief Called when poses in @a source are detected.
+ * @details This type callback is invoked each time when
+ *          mv_inference_pose_landmark_detect() is called to provide
+ *          the results of the pose landmark detection.
+ *
+ * @since_tizen 6.0
+ * @remarks The @a locations should not be released by app. They can be used only in the callback.
+ *
+ * @param[in] source               The handle to the source of the media where
+ *                                 landmarks were detected. @a source is the same object
+ *                                 for which mv_inference_pose_landmark_detect() was called.
+ *                                 It should be released by calling mv_destroy_source()
+ *                                 when it's not needed anymore.
+ * @param[in] locations            Locations of the detected pose landmarks.
+ * @param[in] label                Label indicating pose of the landmark. If a label is provided by
+ *                                 a model, a value larger than or equal to 0 is provided.
+ *                                 Otherwise, -1 is provided.
+ * @param[in] user_data            The user data passed from callback invoking code
+ *
+ * @see mv_inference_pose_landmark_detect()
+ */
+typedef void (*mv_inference_pose_landmark_detected_cb)(
+       mv_source_h source,
+       mv_inference_pose_result_h locations,
+       int label,
+       void *user_data);
+
+/**
+ * @brief Performs pose landmarks detection on the @a source.
+ * @details Use this function to launch pose landmark detection.
+ *          Each time when mv_inference_pose_landmark_detect() is
+ *          called, @a detected_cb will receive a list of pose landmark's locations
+ *          in the media source.
+ *
+ * @since_tizen 6.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] source         The handle to the source of the media
+ * @param[in] infer          The handle to the inference
+ * @param[in] roi            Rectangular area including a face in @a source which
+ *                           will be analyzed. If NULL, then the whole source will be
+ *                           analyzed.
+ * @param[in] detected_cb    The callback which will receive the detection results.
+ * @param[in] user_data      The user data passed from the code where
+ *                           mv_inference_pose_landmark_detect() is invoked.
+ *                           This data will be accessible in @a detected_cb callback.
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_inference_create()
+ * @pre Configure an inference handle by calling mv_inference_configure()
+ * @pre Prepare an inference by calling mv_inference_prepare()
+ * @post @a detected_cb will be called to provide detection results
+ *
+ * @see mv_inference_pose_landmark_detected_cb()
+ */
+int mv_inference_pose_landmark_detect(
+       mv_source_h source,
+       mv_inference_h infer,
+       mv_rectangle_s *roi,
+       mv_inference_pose_landmark_detected_cb detected_cb,
+       void *user_data);
+
+/**
+ * @brief Gets the number of poses.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] result                   The handle to inference result
+ * @param[in,out] number_of_poses   The pointer to the number of poses
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @see mv_inference_pose_landmark_detected_cb()
+ * @see mv_inference_pose_result_h
+ */
+int mv_inference_pose_get_number_of_poses(
+       mv_inference_pose_result_h result, int *number_of_poses);
+
+/**
+ * @brief Gets the number of landmarks per a pose.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] result                           The handle to inference result
+ * @param[in,out] number_of_landmarks   The pointer to the number of landmarks
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @see mv_inference_pose_landmark_detected_cb()
+ * @see mv_inference_pose_result_h
+ */
+int mv_inference_pose_get_number_of_landmarks(
+       mv_inference_pose_result_h result, int *number_of_landmarks);
+
+/**
+ * @brief Gets landmark location of a part of a pose.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] result            The handle to inference result
+ * @param[in] pose_index        The pose index between 0 and
+ *                              the number of poses which can be gotten by
+ *                              mv_inference_pose_get_number_of_poses()
+ * @param[in] pose_part         The landmark index between 0 and
+ *                              the number of landmarks which can be gotten by
+ *                              mv_inference_pose_get_number_of_landmarks()
+ * @param[in,out] location      The location of a landmark
+ * @param[in,out] score         The score of a landmark
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @see mv_inference_pose_get_number_of_poses()
+ * @see mv_inference_pose_get_number_of_landmarks()
+ * @see mv_inference_pose_landmark_detected_cb()
+ * @see mv_inference_pose_result_h
+ */
+int mv_inference_pose_get_landmark(
+       mv_inference_pose_result_h result, int pose_index, int pose_part, mv_point_s *location, float *score);
+
+/**
+ * @brief Creates pose handle.
+ * @details Use this function to create a pose.
+ *
+ * @since_tizen 6.0
+ *
+ * @remarks The @a pose should be released using mv_pose_destroy().
+ *
+ * @param[out] pose    The handle to the pose to be created
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ *
+ * @see mv_pose_destroy()
+ */
+int mv_pose_create(mv_pose_h *pose);
+
+/**
+ * @brief Destroys pose handle and releases all its resources.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] pose    The handle to the pose to be destroyed
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ * @pre Create pose handle by using mv_pose_create()
+ *
+ * @see mv_pose_create()
+ */
+int mv_pose_destroy(mv_pose_h pose);
+
+/**
+ * @brief Sets a motion capture file and its pose mapping file to the pose.
+ * @details Use this function to set a motion capture file and
+ *          its pose mapping file. These are used by mv_pose_compare()
+ *          to compare a given pose by mv_inference_pose_landmark_estimation().
+ *
+ *
+ * @since_tizen 6.0
+ * @remarks If the app sets paths to media storage,
+ *          then the media storage privilege
+ *          %http://tizen.org/privilege/mediastorage is needed.\n
+ *          If the app sets the paths to external storage,
+ *          then the external storage privilege
+ *          %http://tizen.org/privilege/externalstorage is needed.\n
+ *          If the required privileges aren't set properly,
+ *          mv_pose_set_from_file() will returned #MEDIA_VISION_ERROR_PERMISSION_DENIED.
+ *
+ * @param[in] pose                  The handle to the pose
+ * @param[in] motion_capture_file_path The file path to the motion capture file
+ * @param[in] motion_mapping_file_path The file path to the motion mapping file
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of capture or mapping file
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ */
+int mv_pose_set_from_file(mv_pose_h pose, const char *motion_capture_file_path, const char *motion_mapping_file_path);
+
+/**
+ * @brief Compares an action pose with the pose which is set by mv_pose_set_from_file().
+ * @details Use this function to compare action pose with the pose
+ *          which is set by mv_pose_set_from_file().
+ *          Parts to be compared can be selected by #mv_inference_human_body_part_e.
+ *          Their similarity will be given by the score between 0 ~ 1.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] pose   The handle to the pose
+ * @param[in] action The action pose
+ * @param[in] parts  The parts to be compared
+ * @param[out] score The similarity score
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+ *
+ * @pre Sets the pose by using mv_pose_set_from_file()
+ * @pre Detects the pose by using mv_inference_pose_landmark_detect()
+ */
+int mv_pose_compare(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score);
+/**
   * @}
   */
  
diff --git a/include/mv_inference_private.h b/include/mv_inference_private.h

new file mode 100644 (file)

index 0000000..564bab8
--- /dev/null
+++ b/include/mv_inference_private.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TIZEN_MEDIAVISION_INFERENCE_PRIVATE_H__
+#define __TIZEN_MEDIAVISION_INFERENCE_PRIVATE_H__
+
+#include <mv_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @file  mv_inference_private.h
+ * @brief This file contains internal structure for pose.
+ */
+
+/**
+  * @brief Position, score, and label for describing a landmark.
+  *
+  * @since_tizen 6.0
+  *
+  */
+typedef struct {
+    bool isAvailable;   /**< Availability of landmark */
+    mv_point_s point;   /**< 2D position of landmark */
+    const char *label;  /**< Label of landmark */
+    float score;        /**< Score of landmark */
+} mv_inference_landmark_s;
+
+/**
+  * @brief Landmarks of a pose.
+  *
+  * @since_tizen 6.0
+  *
+  */
+typedef struct {
+    int number_of_poses;
+    int number_of_landmarks_per_pose;            /**< The number of landmarks*/
+    mv_inference_landmark_s *landmarks; /**< A set of landmarks describing pose */
+} mv_inference_pose_s;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __TIZEN_MEDIAVISION_INFERENCE_TYPE_H__ */
diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h

index 0d00ae2..97a56ea 100644 (file)
--- a/include/mv_inference_type.h
+++ b/include/mv_inference_type.h
@@ -17,6 +17,8 @@
  #ifndef __TIZEN_MEDIAVISION_INFERENCE_TYPE_H__
  #define __TIZEN_MEDIAVISION_INFERENCE_TYPE_H__
  
+#include <mv_common.h>
+
  #ifdef __cplusplus
  extern "C" {
  #endif /* __cplusplus */
@@ -107,13 +109,67 @@ typedef enum {
  } mv_inference_data_type_e;
  
  /**
+  * @brief Enumeration for human pose landmark.
+  *
+  * @since_tizen 6.0
+  *
+  */
+typedef enum {
+    MV_INFERENCE_HUMAN_POSE_HEAD = 1,       /**< Head of human pose */
+    MV_INFERENCE_HUMAN_POSE_NECK,           /**< Neck of human pose */
+    MV_INFERENCE_HUMAN_POSE_THORAX,         /**< Thorax of human pose */
+    MV_INFERENCE_HUMAN_POSE_RIGHT_SHOULDER, /**< Right shoulder of human pose */
+    MV_INFERENCE_HUMAN_POSE_RIGHT_ELBOW,    /**< Right elbow of human pose */
+    MV_INFERENCE_HUMAN_POSE_RIGHT_WRIST,    /**< Right wrist of human pose */
+    MV_INFERENCE_HUMAN_POSE_LEFT_SHOULDER,  /**< Left shoulder of human pose */
+    MV_INFERENCE_HUMAN_POSE_LEFT_ELBOW,     /**< Left elbow of human pose */
+    MV_INFERENCE_HUMAN_POSE_LEFT_WRIST,     /**< Left wrist of human pose */
+    MV_INFERENCE_HUMAN_POSE_PELVIS,         /**< Pelvis of human pose */
+    MV_INFERENCE_HUMAN_POSE_RIGHT_HIP,      /**< Right hip of human pose */
+    MV_INFERENCE_HUMAN_POSE_RIGHT_KNEE,     /**< Right knee of human pose */
+    MV_INFERENCE_HUMAN_POSE_RIGHT_ANKLE,    /**< Right ankle of human pose */
+    MV_INFERENCE_HUMAN_POSE_LEFT_HIP,       /**< Left hip of human pose */
+    MV_INFERENCE_HUMAN_POSE_LEFT_KNEE,      /**< Left knee of human pose */
+    MV_INFERENCE_HUMAN_POSE_LEFT_ANKLE      /**< Left ankle of human pose */
+} mv_inference_human_pose_landmark_e;
+
+/**
+  * @brief Enumeration for human body parts.
+  *
+  * @since_tizen 6.0
+  *
+  */
+typedef enum {
+    MV_INFERENCE_HUMAN_BODY_PART_HEAD = 1 << 0,         /**< HEAD, NECK, and THORAX */
+    MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT = 1 << 1,    /**< RIGHT SHOULDER, ELBOW, and WRIST */
+    MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT = 1 << 2,     /**< LEFT SHOULDER, ELBOW, and WRIST */
+    MV_INFERENCE_HUMAN_BODY_PART_BODY = 1 << 3,         /**< THORAX, PELVIS, RIGHT HIP, and LEFT HIP */
+    MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT = 1 << 4,    /**< RIGHT HIP, KNEE, and ANKLE */
+    MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT = 1 << 5      /**< LEFT HIP, KNEE, and ANKLE */
+} mv_inference_human_body_part_e;
+
+/**
   * @brief The inference handle.
- *
+ * @details Contains information about location of
+ *          detected landmarks for one or more poses.
   * @since_tizen 5.5
   */
  typedef void *mv_inference_h;
  
  /**
+ * @brief The inference pose result handle.
+ *
+ * @since_tizen 6.0
+ */
+typedef void *mv_inference_pose_result_h;
+
+/**
+ * @brief The pose handle.
+ *
+ * @since_tizen 6.0
+ */
+typedef void *mv_pose_h;
+/**
   * @}
   */
  
diff --git a/mv_inference/inference/include/Bvh.h b/mv_inference/inference/include/Bvh.h

new file mode 100644 (file)

index 0000000..6b9b853
--- /dev/null
+++ b/mv_inference/inference/include/Bvh.h
@@ -0,0 +1,108 @@
+#ifndef __MEDIA_VISION_BVH_H__
+#define __MEDIA_VISION_BVH_H__
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include "Joint.h"
+#include <memory>
+#include <vector>
+
+namespace mediavision
+{
+namespace inference
+{
+
+  /** Class created for storing motion data from bvh file */
+  class Bvh {
+  public:
+    /** Constructor of Bvh object
+     *  @details  Initializes local variables
+     */
+    Bvh() : num_frames_(0), frame_time_(0), num_channels_(0) {}
+
+    /**
+     * Recalculation of local transformation matrix for each frame in each joint
+     *
+     * Should be called to set local_transformation_matrix vectors in joints
+     * structures.
+     *
+     * @param start_joint  A joint of which each child local transformation
+     * matrix will be recalculated, as default it is NULL which will be resolved
+     * to root_joint in method body
+     */
+    void recalculate_joints_ltm(std::shared_ptr<Joint> start_joint = NULL);
+
+    /** Adds joint to Bvh object
+     *  @details  Adds joint and increases number of data channels
+     *  @param  joint  The joint that will be added
+     */
+    void add_joint(const std::shared_ptr<Joint> joint) {
+      joints_.push_back(joint);
+      num_channels_ += joint->num_channels();
+    }
+
+    /** Gets the root joint
+     *  @return  The root joint
+     */
+    const std::shared_ptr<Joint> root_joint() const { return root_joint_; }
+
+    /** Gets all joints
+     *  @return  The all joints
+     */
+    const std::vector <std::shared_ptr <Joint>> joints() const {
+      return joints_;
+    }
+
+    /** Gets the number of data frames
+     *  @return  The number of frames
+     */
+    unsigned num_frames() const { return num_frames_; }
+
+    /** Gets the frame time
+     *  @return  The single frame time (in second)
+     */
+    double frame_time() const { return frame_time_; }
+
+    /** Gets the total number of channels
+     *  @return  The number of data channels
+     */
+    unsigned num_channels() const { return num_channels_; }
+
+    /** Sets the root joint
+     *  @param  arg  The root joint to be set
+     */
+    void set_root_joint(const std::shared_ptr<Joint> arg) { root_joint_ = arg; }
+
+    /** Sets the all joint at once
+     *  @param  arg  The all joints to be set
+     */
+    void set_joints(const std::vector <std::shared_ptr <Joint>> arg) {
+      joints_ = arg;
+    }
+
+    /** Sets the number of data frames
+     *  @param  arg  The number of frames to be set
+     */
+    void set_num_frames(const unsigned arg) { num_frames_ = arg; }
+
+    /** Sets the single data frame time
+     *  @param  arg  The time of frame to be set
+     */
+    void set_frame_time(const double arg) { frame_time_ = arg; }
+
+  private:
+    /** A root joint in this bvh file */
+    std::shared_ptr<Joint> root_joint_;
+    /** All joints in file in order of parse */
+    std::vector <std::shared_ptr <Joint>> joints_;
+    /** A number of motion frames in this bvh file */
+    unsigned num_frames_;
+    /** A time of single frame */
+    double frame_time_;
+    /** Number of channels of all joints */
+    unsigned num_channels_;
+  };
+
+} // namespace
+}
+#endif  // __MEDIA_VISION_BVH_H__
diff --git a/mv_inference/inference/include/BvhParser.h b/mv_inference/inference/include/BvhParser.h

new file mode 100644 (file)

index 0000000..a7c03fd
--- /dev/null
+++ b/mv_inference/inference/include/BvhParser.h
@@ -0,0 +1,75 @@
+#ifndef __MEDIA_VISION_BVH_PARSER_H__
+#define __MEDIA_VISION_BVH_PARSER_H__
+
+#include "Bvh.h"
+#include "Joint.h"
+
+#include <algorithm>
+#include <functional>
+#include <locale>
+#include <memory>
+
+namespace mediavision
+{
+namespace inference
+{
+
+    /** Bvh Parser class that is responsible for parsing .bvh file */
+    class BvhParser {
+    public:
+    /** Parses single bvh file and stored data into bvh structure
+     *  @param  path  The path to file to be parsed
+     *  @param  bvh   The pointer to bvh object where parsed data will be stored
+     *  @return  0 if success, -1 otherwise
+     */
+    int parse(const std::string& path, Bvh* bvh);
+
+    private:
+    /** Parses single hierarchy in bvh file
+     *  @param  file  The input stream that is needed for reading file content
+     *  @return  0 if success, -1 otherwise
+     */
+    int parse_hierarchy(std::ifstream& file);
+
+    /** Parses joint and its children in bvh file
+     *  @param  file    The input stream that is needed for reading file content
+     *  @param  parent  The pointer to parent joint
+     *  @param  parsed  The output parameter, here will be stored parsed joint
+     *  @return  0 if success, -1 otherwise
+     */
+    int parse_joint(std::ifstream& file, std::shared_ptr <Joint> parent,
+        std::shared_ptr <Joint>& parsed);
+
+    /** Parses order of channel for single joint
+     *  @param  file    The input stream that is needed for reading file content
+     *  @param  joint   The pointer to joint that channels order will be parsed
+     *  @return  0 if success, -1 otherwise
+     */
+    int parse_channel_order(std::ifstream& file, std::shared_ptr <Joint> joint);
+
+    /** Parses motion part data
+     *  @param  file    The input stream that is needed for reading file content
+     *  @return  0 if success, -1 otherwise
+     */
+    int parse_motion(std::ifstream& file);
+
+    /** Trims the string, removes leading and trailing whitespace from it
+     *  @param  s   The string, which leading and trailing whitespace will be
+     *              trimmed
+     */
+    inline void trim(std::string &s) {
+        s.erase( std::remove_if( s.begin(), s.end(),
+            std::bind( std::isspace<char>, std::placeholders::_1,
+            std::locale::classic() ) ), s.end() );
+    }
+
+
+    /** The path to file that was parsed previously */
+    std::string path_;
+
+    /** The bvh object to store parsed data */
+    Bvh* bvh_;
+    };
+}
+} // namespace
+#endif  // __MEDIA_VISION_BVH_PARSER_H__
diff --git a/mv_inference/inference/include/BvhUtils.h b/mv_inference/inference/include/BvhUtils.h

new file mode 100644 (file)

index 0000000..9d1a131
--- /dev/null
+++ b/mv_inference/inference/include/BvhUtils.h
@@ -0,0 +1,40 @@
+#ifndef __MEDIA_VISION_BVH_UTILS_H__
+#define __MEDIA_VISION_BVH_UTILS_H__
+
+#include <cmath>
+#include <limits>
+
+#include <iostream>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+namespace mediavision
+{
+namespace inference
+{
+
+/** Enumeration class for axis */
+enum class Axis {
+  X,
+  Y,
+  Z
+};
+
+/** Creates rotation matrix
+ *  @param  angle  The rotation angle
+ *  @param  axis   The rotation axis
+ *  @return  The rotation matrix
+ */
+cv::Mat rotation_matrix(float angle, Axis axis);
+
+/** Rotates matrix
+ *  @param  matrix  The matrix to be rotated
+ *  @param  angle   The rotation angle
+ *  @param  axis    The rotation axis
+ *  @return  The rotation matrix
+ */
+cv::Mat rotate(cv::Mat matrix, float angle, Axis axis);
+
+} // namespace
+}
+#endif //__MEDIA_VISION_BVH_UTILS_H__
+\ No newline at end of file
diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h

index 4c8204b..cd7c7ab 100644 (file)
--- a/mv_inference/inference/include/Inference.h
+++ b/mv_inference/inference/include/Inference.h
@@ -23,10 +23,14 @@
  #include "mv_common.h"
  #include "inference_engine_error.h"
  #include "inference_engine_common_impl.h"
+#include "mv_inference_private.h"
  #include <mv_inference_type.h>
  #include <opencv2/core.hpp>
  #include <opencv2/imgproc.hpp>
  
+#define POSE_MAX_LANDMARKS 16
+#define POSE_MAX_PARTS 6
+
  /**
   * @file Inference.h
   * @brief This file contains the inference class definition which
@@ -60,6 +64,13 @@ typedef struct _FacialLandMarkDetectionResults {
         std::vector<cv::Point> locations;
  } FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */
  
+typedef struct _PoseLandmarkDetectionResults {
+       int number_of_landmarks;
+       std::vector<cv::Point2f> locations;
+       std::vector<float> score;
+} PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */
+
+
  namespace mediavision
  {
  namespace inference
@@ -142,7 +153,7 @@ namespace inference
                                                                  double stdValue, double meanValue);
  
                 /**
-                * @brief Configure input infomation
+                * @brief Configure input information
                  *
                  * @since_tizen 6.0
                  */
@@ -289,6 +300,15 @@ namespace inference
                  */
                 int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results);
  
+               /**
+                * @brief       Gets the PoseLandmarkDetectionResults
+                *
+                * @since_tizen 6.0
+                * @return @c true on success, otherwise a negative error value
+                */
+               int GetPoseLandmarkDetectionResults(mv_inference_pose_result_h *detectionResults,
+                                                                               int width, int height);
+
                 int GetResults(std::vector<std::vector<int> > *dimInfo,
                                            std::vector<float *> *results);
  
@@ -325,6 +345,8 @@ namespace inference
                 std::vector<inference_engine_tensor_buffer> mOutputTensorBuffers;
                 inference_engine_layer_property mOutputLayerProperty;
  
+               mv_inference_pose_s *mPoseResult;
+
         private:
                 void CheckSupportedInferenceBackend();
                 int ConvertEngineErrorToVisionError(int error);
diff --git a/mv_inference/inference/include/Joint.h b/mv_inference/inference/include/Joint.h

new file mode 100644 (file)

index 0000000..d28a70c
--- /dev/null
+++ b/mv_inference/inference/include/Joint.h
@@ -0,0 +1,232 @@
+#ifndef __MEDIA_VISION_JOINT_H__
+#define __MEDIA_VISION_JOINT_H__
+
+#include <opencv2/core.hpp>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace mediavision
+{
+namespace inference
+{
+
+  /** Class created for storing single joint data from bvh file */
+  class Joint {
+  public:
+    /** A struct that keep offset of joint in relation to parent */
+    struct Offset {
+      float x;
+      float y;
+      float z;
+    };
+
+    /** A enumeration type useful for set order of channels for every joint */
+    enum class Channel {
+      XPOSITION,
+      YPOSITION,
+      ZPOSITION,
+      ZROTATION,
+      XROTATION,
+      YROTATION
+    };
+
+    /** A string names for each channel */
+    const std::vector<std::string> channel_name_str = {
+      "XPOSITION",
+      "YPOSITION",
+      "ZPOSITION",
+      "ZROTATION",
+      "XROTATION",
+      "YROTATION"
+    };
+
+    /** Adds single frame motion data
+     *  @param  data    The motion data to be added
+     */
+    void add_frame_motion_data(const std::vector <float>& data) {
+      channel_data_.push_back(data);
+    }
+
+    /** Gets the parent joint of this joint
+     *  @return  The parent joint
+     */
+    std::shared_ptr <Joint> parent() const { return parent_; }
+
+    /** Gets the name of this joint
+     *  @return  The joint's name
+     */
+    std::string name() const { return name_; }
+
+    /** Gets the offset of this joint
+     *  @return  The joint's offset
+     */
+    Offset offset() const { return offset_; }
+
+    /** Gets the channels order of this joint
+     *  @return  The joint's channels order
+     */
+    std::vector <Channel> channels_order() const {
+      return channels_order_;
+    }
+
+    /** Gets the all children joints of this joint
+     *  @return  The joint's children
+     */
+    std::vector <std::shared_ptr <Joint>> children() const {
+      return children_;
+    }
+
+    /** Gets the channels data of this joint for all frames
+     *  @return  The joint's channel data
+     */
+    const std::vector <std::vector <float>>& channel_data() const {
+      return channel_data_;
+    }
+
+    /** Gets the channel data of this joint for selected frame
+     *  @param   frame   The frame for which channel data will be returned
+     *  @return  The joint's channel data for selected frame
+     */
+    const std::vector <float>& channel_data(unsigned frame) const {
+      return channel_data_[frame];
+    }
+
+    /** Gets the channel data of this joint for selected frame and channel
+     *  @param   frame        The frame for which channel data will be returned
+     *  @param   channel_num  The number of channel which data will be returned
+     *  @return  The joint's channel data for selected frame and channel
+     */
+    float channel_data(unsigned frame, unsigned channel_num) const {
+      return channel_data_[frame][channel_num];
+    }
+
+    /** Gets the local transformation matrix for this joint for all frames
+     *  @return  The joint's local transformation matrix
+     */
+    std::vector <cv::Mat> ltm() const {
+      return ltm_;
+    }
+
+    /** Gets the local transformation matrix for this joint for selected frame
+     *  @param   frame    The frame for which ltm will be returned
+     *  @return  The joint's local transformation matrix for selected frame
+     */
+    cv::Mat ltm(unsigned frame) const {
+      return ltm_[frame];
+    }
+
+    /** Gets the position for this joint for all frames
+     *  @return  The joint's position
+     */
+    std::vector <cv::Vec3f> pos() const {
+      return pos_;
+    }
+
+    /** Gets the position for this joint for selected frame
+     *  @param   frame    The frame for which ltm will be returned
+     *  @return  The joint's position for selected frame
+     */
+    cv::Vec3f pos(unsigned frame) const {
+      return pos_[frame];
+    }
+
+    /** Gets the number of channels of this joint
+     *  @return  The joint's channels number
+     */
+    unsigned num_channels() const { return channels_order_.size(); }
+
+    /** Sets the this joint parent joint
+     *  @param   arg    The parent joint of this joint
+     */
+    void set_parent(const std::shared_ptr <Joint> arg) { parent_ = arg; }
+
+    /** Sets the this joint name
+     *  @param   arg    The name of this joint
+     */
+    void set_name(const std::string arg) { name_ = arg; }
+
+    /** Sets the this joint offset
+     *  @param   arg    The offset of this joint
+     */
+    void set_offset(const Offset arg) { offset_ = arg; }
+
+    /** Sets the this joint channels order
+     *  @param   arg    The channels order of this joint
+     */
+    void set_channels_order(const std::vector <Channel>& arg) {
+      channels_order_ = arg;
+    }
+
+    /** Sets the this joint children
+     *  @param   arg    The children of this joint
+     */
+    void set_children(const std::vector <std::shared_ptr <Joint>>& arg) {
+      children_ = arg;
+    }
+
+    /** Sets the this joint channels data
+     *  @param   arg    The channels data of this joint
+     */
+    void set_channel_data(const std::vector <std::vector <float>>& arg) {
+      channel_data_ = arg;
+    }
+
+    /** Sets local transformation matrix for selected frame
+     *  @param  matrix  The local transformation matrix to be set
+     *  @param  frame   The number of frame for which you want set ltm. As
+     *                  default it is set to 0.
+     */
+    void set_ltm(const cv::Mat matrix, unsigned frame = 0) {
+      if (frame > 0 && frame < ltm_.size())
+        ltm_[frame] = matrix;
+      else
+        ltm_.push_back(matrix);
+    }
+
+    /** Sets local transformation matrix for selected frame
+     *  @param  pos     The position of joint in selected frame to be set
+     *  @param  frame   The number of frame for which you want set position. As
+     *                  default it is set to 0.
+     */
+    void set_pos(const cv::Vec3f pos, unsigned frame = 0) {
+      if (frame > 0 && frame < pos_.size())
+        pos_[frame] = pos;
+      else
+        pos_.push_back(pos);
+    }
+
+    /** Gets channels name of this joint
+     *  @return The joint's channels name
+     */
+    const std::vector<std::string> get_channels_name() const {
+      std::vector<std::string> channel_names;
+
+      for (int i = 0; i < channels_order_.size(); i++)
+        channel_names.push_back(channel_name_str[static_cast<int>(
+            channels_order_[i])]);
+
+      return channel_names;
+    }
+
+  private:
+    /** Parent joint in file hierarchy */
+    std::shared_ptr <Joint> parent_;
+    std::string name_;
+    Offset offset_;
+    /** Order of joint's input channels */
+    std::vector <Channel> channels_order_;
+    /** Pointers to joints that are children of this in hierarchy */
+    std::vector <std::shared_ptr <Joint>> children_;
+    /** Structure for keep joint's channel's data.
+     *  Each vector keep data for one channel.
+     */
+    std::vector <std::vector <float> > channel_data_;
+    /** Local transformation matrix for each frame */
+    std::vector <cv::Mat> ltm_;
+    /** Vector x, y, z of joint position for each frame */
+    std::vector <cv::Vec3f> pos_;
+  };
+}
+} // namespace
+#endif // __MEDIA_VISION_JOINT_H__
diff --git a/mv_inference/inference/include/Posture.h b/mv_inference/inference/include/Posture.h

new file mode 100644 (file)

index 0000000..4c67fdc
--- /dev/null
+++ b/mv_inference/inference/include/Posture.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSE_H__
+#define __MEDIA_VISION_POSE_H__
+
+#include <string>
+#include <map>
+
+#include "mv_common.h"
+#include "Inference.h"
+#include "Bvh.h"
+#include "BvhParser.h"
+#include <mv_inference_type.h>
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+/**
+ * @file Pose.h
+ * @brief This file contains the pose class definition
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+       class Posture
+       {
+       public:
+               /**
+                * @brief   Creates an Posture class instance.
+                *
+                * @since_tizen 6.0
+                */
+               Posture();
+
+               /**
+                * @brief   Destroys an Posture class instance including
+                *           its all resources.
+                *
+                * @since_tizen 6.0
+                */
+               ~Posture();
+
+               /**
+                * @brief   Sets file path
+                *
+                * @since_tizen 6.0
+                */
+               int setPoseFromFile(const std::string motionCaptureFilePath,
+                                                       const std::string motionMappingFilePath);
+
+               /**
+                * @brief   Compares a pose for @a part and returns score
+                *
+                * @since_tizen 6.0
+                */
+               int compare(int parts, std::vector<std::pair<bool, cv::Point>> action,
+                                       float* score);
+
+       private:
+        cv::Vec2f getUnitVectors(cv::Point point1, cv::Point point2);
+               int getParts(int parts,
+                                       std::vector<std::pair<bool, cv::Point>>& pose,
+                                       std::vector<std::pair<bool, std::vector<cv::Vec2f>>>& posePart);
+               float getSimilarity(int parts,
+                                       std::vector<std::pair<bool, std::vector<cv::Vec2f>>>& posePart,
+                                       std::vector<std::pair<bool, std::vector<cv::Vec2f>>>& actionPart);
+               float cosineSimilarity(std::vector<cv::Vec2f> vec1, std::vector<cv::Vec2f> vec2, int size);
+
+       private:
+        BvhParser mBvhParser;
+        Bvh mBvh;
+               std::map<std::string, int> mMotionToPoseMap; /**< name, index */
+               std::vector<std::pair<bool, cv::Point>> mPose;
+               std::vector<std::pair<bool, std::vector<cv::Vec2f>>> mPoseParts;
+       };
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_INFERENCE_H__ */
diff --git a/mv_inference/inference/include/mv_inference_open.h b/mv_inference/inference/include/mv_inference_open.h

index 3e3225f..e72aaa2 100644 (file)
--- a/mv_inference/inference/include/mv_inference_open.h
+++ b/mv_inference/inference/include/mv_inference_open.h
@@ -491,6 +491,190 @@ extern "C"
                         mv_inference_facial_landmark_detected_cb detected_cb,
                         void *user_data);
  
+       /**
+        * @brief Performs pose landmarks detection on the @a source.
+        * @details Use this function to launch pose landmark detection.
+        *          Each time when mv_inference_pose_landmark_detect_open() is
+        *          called, @a detected_cb will receive a list pose landmark's locations
+        *          in the media source.
+        *
+        * @since_tizen 6.0
+        * @remarks This function is synchronous and may take considerable time to run.
+        *
+        * @param[in] source         The handle to the source of the media
+        * @param[in] infer          The handle to the inference
+        * @param[in] roi            Rectangular area including a face in @a source which
+        *                           will be analyzed. If NULL, then the whole source will be
+        *                           analyzed.
+        * @param[in] detected_cb    The callback which will receive the detection results.
+        * @param[in] user_data      The user data passed from the code where
+        *                           mv_inference_pose_landmark_detect_open() is invoked.
+        *                           This data will be accessible in @a detected_cb callback.
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+        *                                                  isn't supported
+        *
+        * @pre Create a source handle by calling mv_create_source_open()
+        * @pre Create an inference handle by calling mv_inference_create_open()
+        * @pre Configure an inference handle by calling mv_inference_configure_open()
+        * @pre Prepare an inference by calling mv_inference_prepare_open()
+        * @post @a detected_cb will be called to provide detection results
+        *
+        * @see mv_inference_pose_landmark_detected_cb()
+        */
+       int mv_inference_pose_landmark_detect_open(
+                       mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi,
+                       mv_inference_pose_landmark_detected_cb detected_cb,
+                       void *user_data);
+
+/**
+ * @brief Gets the number of pose.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] result                   The handle to inference result
+ * @param[in, out] number_of_poses  The pointer to the number of poses
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ */
+int mv_inference_pose_get_number_of_poses_open(
+       mv_inference_pose_result_h result, int *number_of_poses);
+
+/**
+ * @brief Gets the number of landmark per a pose.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] result                           The handle to inference result
+ * @param[in, out] number_of_landmarks  The pointer to the number of landmarks
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ */
+int mv_inference_pose_get_number_of_landmarks_open(
+       mv_inference_pose_result_h result, int *number_of_landmarks);
+
+/**
+ * @brief Gets landmark location of a part of a pose.
+ *
+ * @since_tizen 6.0
+ *
+ * @param[in] result           The handle to inference result
+ * @param[in] pose_index               The pose index
+ * @param[in] pose_part        The part of a pose
+ * @param[in, out] location            The location of a landmark
+ * @param[in, out] score               The score of a landmark
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ *
+ */
+int mv_inference_pose_get_landmark_open(
+       mv_inference_pose_result_h result, int pose_index, int part_index, mv_point_s *location, float *score);
+
+       /**
+        * @brief Creates pose handle.
+        * @details Use this function to create a pose.
+        *
+        * @since_tizen 6.0
+        *
+        * @remarks The @a pose should be released using mv_pose_destroy_open().
+        *
+        * @param[out] infer    The handle to the pose to be created
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+        *
+        * @see mv_pose_destroy_open()
+        */
+       int mv_pose_create_open(mv_pose_h *pose);
+
+       /**
+        * @brief Destroys pose handle and releases all its resources.
+        *
+        * @since_tizen 6.0
+        *
+        * @param[in] pose    The handle to the pose to be destroyed
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        *
+        * @pre Create pose handle by using mv_pose_create_open()
+        *
+        * @see mv_pose_create_open()
+        */
+       int mv_pose_destroy_open(mv_pose_h pose);
+
+       /**
+        * @brief Sets a motion capture file and its pose mapping file to the pose.
+        * @details Use this function to set a motion capture file and
+        *          its pose mapping file. These are used by mv_pose_compare_open()
+        *          to compare a given pose by mv_inference_pose_landmark_estimation_open().
+        *
+        * @since_tizen 6.0
+        * @remarks If the app sets paths to media storage,
+        *          then the media storage privilege
+        *          %http://tizen.org/privilege/mediastorage is needed.\n
+        *          If the app sets the paths to external storage,
+        *          then the external storage privilege
+        *          %http://tizen.org/privilege/externalstorage is needed.\n
+        *          If the required privileges aren't set properly,
+        *          mv_pose_set_from_file_open() will returned #MEDIA_VISION_ERROR_PERMISSION_DENIED.
+        *
+        * @param[in] pose                  The handle to the pose
+        * @param[in] motionCaptureFilePath The file path to the motion capture file
+        * @param[in] motionMappingFilePath The file path to the motion mapping file
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of file paths
+        * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+        */
+       int mv_pose_set_from_file_open(mv_pose_h pose, const char *motionCaptureFilePath, const char *motionMappingFilePath);
+
+       /**
+        * @brief Compares an action pose with the pose which is set by mv_pose_set_from_file_open().
+        * @details Use this function to compare action pose with the pose
+        *          which is set by mv_pose_set_from_file_open().
+        *          Parts to be compared can be selected by #mv_inference_human_body_part_e.
+        *          Their similarity can be given by the score between 0 ~ 1.
+        *
+        * @since_tizen 6.0
+        *
+        * @param[in] pose   The handle to the pose
+        * @param[in] action The action pose
+        * @param[in] parts  The parts to be compared
+        * @param[out] score The similarity score
+        *
+        * @return @c 0 on success, otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_NONE Successful
+        * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+        * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+        * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation
+        *
+        * @pre Sets the pose by using mv_pose_set_from_file()
+        */
+       int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score);
+
  #ifdef __cplusplus
  }
  #endif /* __cplusplus */
diff --git a/mv_inference/inference/src/Bvh.cpp b/mv_inference/inference/src/Bvh.cpp

new file mode 100644 (file)

index 0000000..80d75df
--- /dev/null
+++ b/mv_inference/inference/src/Bvh.cpp
@@ -0,0 +1,96 @@
+/**
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "Bvh.h"
+#include "BvhUtils.h"
+#include "mv_private.h"
+#include <iostream>
+
+namespace mediavision
+{
+namespace inference
+{
+
+  void Bvh::recalculate_joints_ltm(std::shared_ptr<Joint> start_joint) {
+
+    LOGI("ENTER");
+
+    if (start_joint == NULL)
+    {
+      if (root_joint_ == NULL)
+        return;
+      else
+        start_joint = root_joint_;
+    }
+
+    LOGD("%s joint", start_joint->name().c_str());
+    //LOG(DEBUG) << "recalculate_joints_ltm: " << start_joint->name();
+    cv::Mat offmat_backup = cv::Mat::eye(4,4, CV_32F);
+    offmat_backup.at<float>(0,3) = start_joint->offset().x;
+    offmat_backup.at<float>(1,3) = start_joint->offset().y;
+    offmat_backup.at<float>(2,3) = start_joint->offset().z;
+
+    std::vector<std::vector<float>> data = start_joint->channel_data();
+
+    for (int i = 0; i < num_frames_; i++) {
+      cv::Mat offmat = offmat_backup; // offset matrix
+      cv::Mat rmat = cv::Mat::eye(4,4,CV_32F);  // identity matrix set on rotation matrix
+      cv::Mat tmat = cv::Mat::eye(4,4,CV_32F);  // identity matrix set on translation matrix
+
+      for (int j = 0;  j < start_joint->channels_order().size(); j++) {
+        if (start_joint->channels_order()[j] == Joint::Channel::XPOSITION)
+          tmat.at<float>(0,3) = data[i][j];
+        else if (start_joint->channels_order()[j] == Joint::Channel::YPOSITION)
+          tmat.at<float>(1,3) = data[i][j];
+        else if (start_joint->channels_order()[j] == Joint::Channel::ZPOSITION)
+          tmat.at<float>(2,3) = data[i][j];
+        else if (start_joint->channels_order()[j] == Joint::Channel::XROTATION)
+          rmat = rotate(rmat, data[i][j], Axis::X);
+        else if (start_joint->channels_order()[j] == Joint::Channel::YROTATION)
+          rmat = rotate(rmat, data[i][j], Axis::Y);
+        else if (start_joint->channels_order()[j] == Joint::Channel::ZROTATION)
+          rmat = rotate(rmat, data[i][j], Axis::Z);
+      }
+
+      cv::Mat ltm = cv::Mat::eye(4,4,CV_32F); // local transformation matrix
+
+      if (start_joint->parent() != NULL)
+        ltm = start_joint->parent()->ltm(i) * offmat;
+      else
+        ltm = tmat * offmat;
+
+      cv::Vec3f wPos(ltm.at<float>(0,3),ltm.at<float>(1,3), ltm.at<float>(2,3));
+      start_joint->set_pos(wPos);
+      //LOG(TRACE) << "Joint world position: " << utils::vec3tos(ltm[3]);
+      LOGD("Joint world position: %f, %f, %f", wPos[0], wPos[1], wPos[2]);
+
+      ltm = ltm * rmat;
+
+      //LOG(TRACE) << "Local transformation matrix: \n" << utils::mat4tos(ltm);
+
+      start_joint->set_ltm(ltm, i);
+    } // num frame
+
+    for (auto& child : start_joint->children()) {
+      recalculate_joints_ltm(child);
+    }
+
+    LOGI("LEAVE");
+  } // recalculate_joints_ltm
+
+}  // end of bvh
+}
diff --git a/mv_inference/inference/src/BvhParser.cpp b/mv_inference/inference/src/BvhParser.cpp

new file mode 100644 (file)

index 0000000..6205c83
--- /dev/null
+++ b/mv_inference/inference/src/BvhParser.cpp
@@ -0,0 +1,397 @@
+/**
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BvhParser.h"
+#include "mv_private.h"
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+
+/** Indicate whether bvh parser allows multi hierarchy or not
+  * Not fully tested
+  */
+#define MULTI_HIERARCHY 0
+
+namespace {
+
+const std::string kChannels = "CHANNELS";
+const std::string kEnd = "End";
+const std::string kEndSite = "End Site";
+const std::string kFrame = "Frame";
+const std::string kFrames = "Frames:";
+const std::string kHierarchy = "HIERARCHY";
+const std::string kJoint = "JOINT";
+const std::string kMotion = "MOTION";
+const std::string kOffset = "OFFSET";
+const std::string kRoot = "ROOT";
+
+const std::string kXpos = "Xposition";
+const std::string kYpos = "Yposition";
+const std::string kZpos = "Zposition";
+const std::string kXrot = "Xrotation";
+const std::string kYrot = "Yrotation";
+const std::string kZrot = "Zrotation";
+
+}
+
+namespace mediavision
+{
+namespace inference
+{
+
+  //##############################################################################
+  // Main parse function
+  //##############################################################################
+  int BvhParser::parse(const std::string& path, Bvh* bvh) {
+    LOGI("ENTER");
+    //LOG(INFO) << "Parsing file : " << path;
+
+    path_ = path;
+    bvh_ = bvh;
+
+    std::ifstream file;
+    file.open(path_);
+
+    if (file.is_open()) {
+      std::string token;
+
+  #if MULTI_HIERARCHY == 1
+      while (file.good()) {
+  #endif
+        file >> token;
+        if (token == kHierarchy) {
+          int ret = parse_hierarchy(file);
+          if (ret)
+            return ret;
+        } else {
+          //LOG(ERROR) << "Bad structure of .bvh file. " << kHierarchy
+          //          << " should be on the top of the file";
+          return -1;
+        }
+  #if MULTI_HIERARCHY == 1
+      }
+  #endif
+    } else {
+      //LOG(ERROR) << "Cannot open file to parse : " << path_;
+      return -1;
+    }
+
+    LOGI("LEAVE");
+    return 0;
+  }
+
+  //##############################################################################
+  // Function parsing hierarchy
+  //##############################################################################
+  int BvhParser::parse_hierarchy(std::ifstream& file) {
+    //LOG(INFO) << "Parsing hierarchy";
+
+    std::string token;
+    int ret;
+
+    if (file.good()) {
+      file >> token;
+
+      //##########################################################################
+      // Parsing joints
+      //##########################################################################
+      if (token == kRoot) {
+        std::shared_ptr <Joint> rootJoint;
+        ret = parse_joint(file, nullptr, rootJoint);
+
+        if (ret)
+          return ret;
+
+        LOGI("There is %d data channels", bvh_->num_channels());
+
+        bvh_->set_root_joint(rootJoint);
+      } else {
+        LOGE("Bad structure of .bvh file.");
+
+        return -1;
+      }
+    }
+
+    if (file.good()) {
+      file >> token;
+
+      //##########################################################################
+      // Parsing motion data
+      //##########################################################################
+      if (token == kMotion) {
+        ret = parse_motion(file);
+
+        if (ret)
+          return ret;
+      } else {
+        LOGE("Bad structure of .bvh file.");
+
+        return -1;
+      }
+    }
+    return 0;
+  }
+
+  //##############################################################################
+  // Function parsing joint
+  //##############################################################################
+  int BvhParser::parse_joint(std::ifstream& file,
+      std::shared_ptr <Joint> parent, std::shared_ptr <Joint>& parsed) {
+
+    //LOG(TRACE) << "Parsing joint";
+
+    std::shared_ptr<Joint> joint = std::make_shared<Joint>();
+    joint->set_parent(parent);
+
+    std::string name;
+    file >> name;
+
+    LOGD("Joint name %s", name.c_str());
+
+    joint->set_name(name);
+
+    std::string token;
+    std::vector <std::shared_ptr <Joint>> children;
+    int ret;
+
+    file >> token;  // Consuming '{'
+    file >> token;
+
+    //############################################################################
+    // Offset parsing
+    //############################################################################
+    if (token == kOffset) {
+      Joint::Offset offset;
+
+      try {
+        file >> offset.x >> offset.y >> offset.z;
+      } catch (const std::ios_base::failure& e) {
+        //LOG(ERROR) << "Failure while parsing offset";
+        return -1;
+      }
+
+      joint->set_offset(offset);
+
+      //LOG(TRACE) << "Offset x: " << offset.x << ", y: " << offset.y << ", z: "
+      //          << offset.z;
+
+    } else {
+      //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kOffset << ", but "
+      //          << "found \"" << token << "\"";
+
+      return -1;
+    }
+
+    file >> token;
+
+    //############################################################################
+    // Channels parsing
+    //############################################################################
+    if (token == kChannels) {
+      ret = parse_channel_order(file, joint);
+
+      //LOG(TRACE) << "Joint has " << joint->num_channels() << " data channels";
+
+      if (ret)
+        return ret;
+    } else {
+      //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kChannels
+      //           << ", but found \"" << token << "\"";
+
+      return -1;
+    }
+
+    file >> token;
+
+    bvh_->add_joint(joint);
+
+    //############################################################################
+    // Children parsing
+    //############################################################################
+
+    while (file.good()) {
+      //##########################################################################
+      // Child joint parsing
+      //##########################################################################
+      if (token == kJoint) {
+        std::shared_ptr <Joint> child;
+        ret = parse_joint(file, joint, child);
+
+        if (ret)
+          return ret;
+
+        children.push_back(child);
+
+      //##########################################################################
+      // Child joint parsing
+      //##########################################################################
+      } else if (token == kEnd) {
+        file >> token >> token;  // Consuming "Site {"
+
+        std::shared_ptr <Joint> tmp_joint = std::make_shared <Joint> ();
+
+        tmp_joint->set_parent(joint);
+        tmp_joint->set_name(kEndSite);
+        children.push_back(tmp_joint);
+
+        file >> token;
+
+        //########################################################################
+        // End site offset parsing
+        //########################################################################
+        if (token == kOffset) {
+          Joint::Offset offset;
+
+          try {
+            file >> offset.x >> offset.y >> offset.z;
+          } catch (const std::ios_base::failure& e) {
+            //LOG(ERROR) << "Failure while parsing offset";
+            return -1;
+          }
+
+          tmp_joint->set_offset(offset);
+
+          // LOG(TRACE) << "Joint name : EndSite";
+          // LOG(TRACE) << "Offset x: " << offset.x << ", y: " << offset.y << ", z: "
+          //           << offset.z;
+
+          file >> token;  // Consuming "}"
+
+        } else {
+          //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kOffset
+          //           << ", but found \"" << token << "\"";
+
+          return -1;
+        }
+
+        bvh_->add_joint(tmp_joint);
+      //##########################################################################
+      // End joint parsing
+      //##########################################################################
+      } else if (token == "}") {
+        joint->set_children(children);
+        parsed = joint;
+        return 0;
+      }
+
+      file >> token;
+    }
+
+    //LOG(ERROR) << "Cannot parse joint, unexpected end of file. Last token : "
+    //           << token;
+    return -1;
+  }
+
+  //##############################################################################
+  // Motion data parse function
+  //##############################################################################
+  int BvhParser::parse_motion(std::ifstream& file) {
+
+    LOGI("ENTER");
+
+    std::string token;
+    file >> token;
+
+    int frames_num;
+
+    if (token == kFrames) {
+      file >> frames_num;
+      bvh_->set_num_frames(frames_num);
+      LOGD("Num of frames: %d", frames_num);
+    } else {
+      LOGE("Bad structure of .bvh file");
+
+      return -1;
+    }
+
+    file >> token;
+
+    double frame_time;
+
+    if (token == kFrame) {
+      file >> token;  // Consuming 'Time:'
+      file >> frame_time;
+      bvh_->set_frame_time(frame_time);
+      LOGD("Frame time: %f",frame_time);
+
+      float number;
+      for (int i = 0; i < frames_num; i++) {
+        for (auto joint : bvh_->joints()) {
+          std::vector <float> data;
+          for (int j = 0; j < joint->num_channels(); j++) {
+            file >> number;
+            data.push_back(number);
+          }
+          LOGD("%s joint", joint->name().c_str());
+          joint->add_frame_motion_data(data);
+        }
+      }
+    } else {
+      LOGE("Bad structure of .bvh file.");
+      return -1;
+    }
+
+    LOGI("LEAVE");
+
+    return 0;
+  }
+
+  //##############################################################################
+  // Channels order parse function
+  //##############################################################################
+  int BvhParser::parse_channel_order(std::ifstream& file,
+      std::shared_ptr <Joint> joint) {
+
+    LOGI("ENTER");
+
+    int num;
+    file >> num;
+    LOGD("Number of channels: %d",num);
+
+    std::vector <Joint::Channel> channels;
+    std::string token;
+
+    for (int i = 0; i < num; i++) {
+      file >> token;
+      if (token == kXpos)
+        channels.push_back(Joint::Channel::XPOSITION);
+      else if (token == kYpos)
+        channels.push_back(Joint::Channel::YPOSITION);
+      else if (token == kZpos)
+        channels.push_back(Joint::Channel::ZPOSITION);
+      else if (token == kXrot)
+        channels.push_back(Joint::Channel::XROTATION);
+      else if (token == kYrot)
+        channels.push_back(Joint::Channel::YROTATION);
+      else if (token == kZrot)
+        channels.push_back(Joint::Channel::ZROTATION);
+      else {
+        //LOG(ERROR) << "Not valid channel!";
+        return -1;
+      }
+    }
+
+    joint->set_channels_order(channels);
+
+    LOGI("LEAVE");
+
+    return 0;
+  }
+
+}
+} // namespace
diff --git a/mv_inference/inference/src/BvhUtils.cpp b/mv_inference/inference/src/BvhUtils.cpp

new file mode 100644 (file)

index 0000000..ba11a91
--- /dev/null
+++ b/mv_inference/inference/src/BvhUtils.cpp
@@ -0,0 +1,72 @@
+/**
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BvhUtils.h"
+#include <cmath>
+
+#define DegreeToRadian(degree) ((degree) * (M_PI/180.f))
+
+namespace mediavision
+{
+namespace inference
+{
+  cv::Mat rotation_matrix(float angle, Axis axis) {
+    cv::Mat matrix = cv::Mat::eye(4,4,CV_32F);
+
+    float rangle = DegreeToRadian(angle);
+
+    // We want to unique situation when in matrix are -0.0f, so we perform
+    // additional checking
+    float sin_a = sin(rangle);
+    if (fabs(sin_a) < std::numeric_limits<float>::epsilon())
+      sin_a = 0.0f;
+    float cos_a = cos(rangle);
+    if (fabs(cos_a) < std::numeric_limits<float>::epsilon())
+      cos_a = 0.0f;
+    float msin_a = fabs(sin_a) < std::numeric_limits<float>::epsilon() ?
+        0.0f : (-1.0f) * sin_a;
+
+    if (axis == Axis::X) {
+        matrix.at<float>(1,1) = cos_a;
+        matrix.at<float>(2,1) = sin_a;
+        matrix.at<float>(1,2) = msin_a;
+        matrix.at<float>(2,2) = cos_a;
+    } else if (axis == Axis::Y) {
+      matrix.at<float>(0,0) = cos_a;
+      matrix.at<float>(2,0) = msin_a;
+      matrix.at<float>(0,2) = sin_a;
+      matrix.at<float>(2,2) = cos_a;
+    } else {
+      matrix.at<float>(0,0) = cos_a;
+      matrix.at<float>(1,0) = sin_a;
+      matrix.at<float>(0,1) = msin_a;
+      matrix.at<float>(1,1) = cos_a;
+    }
+
+    return matrix;
+  }
+
+  /** Rotates matrix
+   *  @param  matrix  The matrix to be rotated
+   *  @param  angle   The rotation angle
+   *  @param  axis    The rotation axis
+   *  @return  The rotation matrix
+   */
+  cv::Mat rotate(cv::Mat matrix, float angle, Axis axis) {
+    return matrix * rotation_matrix(angle, axis);
+  }
+}
+}
+\ No newline at end of file
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp

index 84d2599..d40dafc 100644 (file)
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -78,7 +78,8 @@ namespace inference
                         mSourceSize(cv::Size()),
                         mInputBuffer(cv::Mat()),
                         engine_config(),
-                       mBackend()
+                       mBackend(),
+                       mPoseResult(NULL)
         {
                 LOGI("ENTER");
  
@@ -136,6 +137,11 @@ namespace inference
                                         mOutputLayerProperty.tensor_infos);
                 }
  
+               if (mPoseResult) {
+                       delete [] mPoseResult->landmarks;
+                       delete mPoseResult;
+               }
+
                 mModelFormats.clear();
  
                 // Release backend engine.
@@ -458,7 +464,7 @@ namespace inference
                         return MEDIA_VISION_ERROR_INVALID_PARAMETER;
                 }
  
-               LOGI("Before convering target types : %d", targetType);
+               LOGI("Before converting target types : %d", targetType);
  
                 unsigned int new_type = MV_INFERENCE_TARGET_DEVICE_NONE;
  
@@ -487,7 +493,7 @@ namespace inference
                         break;
                 }
  
-               LOGI("After convering target types : %d", new_type);
+               LOGI("After converting target types : %d", new_type);
  
                 mConfig.mTargetTypes = new_type;
  
@@ -1163,7 +1169,7 @@ namespace inference
                         // provided. In the case, each backend should provide the number of results manually.
                         // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
                         // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
-                       // indicats the image id. But it is useless if a batch mode isn't supported.
+                       // indicates the image id. But it is useless if a batch mode isn't supported.
                         // So, use the 1st of 7.
  
                         number_of_detections = static_cast<int>(
@@ -1252,7 +1258,7 @@ namespace inference
  
                 // In case of object detection,
                 // a model may apply post-process but others may not.
-               // Thus, those cases should be hanlded separately.
+               // Thus, those cases should be handled separately.
                 std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
                 LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
  
@@ -1271,7 +1277,7 @@ namespace inference
                         // provided. In the case, each backend should provide the number of results manually.
                         // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
                         // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
-                       // indicats the image id. But it is useless if a batch mode isn't supported.
+                       // indicates the image id. But it is useless if a batch mode isn't supported.
                         // So, use the 1st of 7.
  
                         number_of_detections = static_cast<int>(
@@ -1384,5 +1390,93 @@ namespace inference
                 return MEDIA_VISION_ERROR_NONE;
         }
  
+       int Inference::GetPoseLandmarkDetectionResults(
+                       mv_inference_pose_result_h *detectionResults, int width, int height)
+       {
+               tensor_t outputData;
+
+               // Get inference result and contain it to outputData.
+               int ret = FillOutputResult(outputData);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                               LOGE("Fail to get output result.");
+                               return ret;
+               }
+
+               if (mPoseResult == NULL) {
+                       mPoseResult = new mv_inference_pose_s;
+                       if (mPoseResult == NULL) {
+                               LOGE("Fail to create result handle");
+                               return MEDIA_VISION_ERROR_INTERNAL;
+                       }
+                       mPoseResult->number_of_poses= 1;
+                       mPoseResult->landmarks = new mv_inference_landmark_s[POSE_MAX_LANDMARKS];
+                       mPoseResult->number_of_landmarks_per_pose = POSE_MAX_LANDMARKS;
+                       for (int index = 0; index < POSE_MAX_LANDMARKS; ++index) {
+                               mPoseResult->landmarks[index].isAvailable = false;
+                               mPoseResult->landmarks[index].point.x = -1;
+                               mPoseResult->landmarks[index].point.y = -1;
+                               mPoseResult->landmarks[index].label = NULL;
+                               mPoseResult->landmarks[index].score = -1.0f;
+                       }
+               }
+
+               std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
+               std::vector<void *> inferResults(outputData.data.begin(),
+                                                                               outputData.data.end());
+
+               long number_of_poses = inferDimInfo[0][3];
+               float *tmp = static_cast<float *>(inferResults[0]);
+               cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+
+               cv::Point loc;
+               cv::Point2f loc2f;
+               double score;
+               cv::Mat blurredHeatMap;
+
+               cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
+                                                                                               CV_32FC(inferDimInfo[0][3]), (void *) tmp);
+
+               cv::Mat multiChannels[inferDimInfo[0][3]];
+               split(reShapeTest, multiChannels);
+
+               float ratioX = static_cast<float>(inferDimInfo[0][2]);
+               float ratioY = static_cast<float>(inferDimInfo[0][1]);
+
+               int poseIndex = 0;
+               // return error if there is no mUserListName but the number_of_landmark
+               // is not 16
+               for (int index = 0; index < number_of_poses; index++) {
+                               poseIndex = index;
+                               if (!mUserListName.empty()) {
+                                       poseIndex = std::stoi(mUserListName[index]) - 1;
+                                       if (poseIndex < 0) {
+                                               continue;
+                                       }
+                               }
+
+                               cv::Mat heatMap = multiChannels[index];
+
+                               cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0);
+                               cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc);
+
+                               loc2f.x = (static_cast<float>(loc.x) / ratioX);
+                               loc2f.y = (static_cast<float>(loc.y) / ratioY);
+                               LOGI("Idx[%2d] - mapping pose [%2d]: x[%.3f], y[%.3f], score[%.3f]",
+                                               index, poseIndex, loc2f.x, loc2f.y, score);
+
+                               mPoseResult->landmarks[poseIndex].isAvailable = true;
+                               mPoseResult->landmarks[poseIndex].point.x = static_cast<int>(static_cast<float>(width) * loc2f.x);
+                               mPoseResult->landmarks[poseIndex].point.y = static_cast<int>(static_cast<float>(height) * loc2f.y);
+                               mPoseResult->landmarks[poseIndex].score = score;
+                               mPoseResult->landmarks[poseIndex].label = NULL;
+               }
+               mPoseResult->number_of_landmarks_per_pose = POSE_MAX_LANDMARKS;
+               mPoseResult->number_of_poses = 1;
+
+               *detectionResults = static_cast<mv_inference_pose_result_h>(mPoseResult);
+
+               return MEDIA_VISION_ERROR_NONE;
+       }
+
  } /* Inference */
  } /* MediaVision */
diff --git a/mv_inference/inference/src/Posture.cpp b/mv_inference/inference/src/Posture.cpp

new file mode 100644 (file)

index 0000000..df21629
--- /dev/null
+++ b/mv_inference/inference/src/Posture.cpp
@@ -0,0 +1,356 @@
+/**
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "Posture.h"
+
+#include <ios>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <unistd.h>
+#include <cfloat>
+
+#define POSE_OFFSET_VALUE 20
+
+namespace mediavision
+{
+namespace inference
+{
+Posture::Posture() :
+    mBvhParser(),
+    mBvh(),
+    mPose()
+{
+    LOGI("ENTER");
+
+    mMotionToPoseMap.clear();
+    mPose.assign(POSE_MAX_LANDMARKS, std::make_pair(false, cv::Point(-1,-1)));
+
+    mPoseParts.assign(POSE_MAX_PARTS, std::make_pair(false, std::vector<cv::Vec2f>()));
+    LOGI("LEAVE");
+}
+
+Posture::~Posture()
+{
+    LOGI("ENTER");
+
+    std::vector<std::pair<bool, cv::Point>>().swap(mPose);
+
+    LOGI("LEAVE");
+}
+
+
+int Posture::getParts(int parts,
+                    std::vector<std::pair<bool, cv::Point>>& pose,
+                    std::vector<std::pair<bool, std::vector<cv::Vec2f>>>& posePart)
+{
+    LOGI("ENTER");
+    // head
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_HEAD) {
+        LOGI("HEAD");
+        if (pose[0].first == false || pose[1].first == false || pose[2].first == false) {
+            posePart[0].first = false;
+        } else {
+            posePart[0].first = true;
+            posePart[0].second.push_back(getUnitVectors(pose[0].second, pose[1].second));
+            posePart[0].second.push_back(getUnitVectors(pose[1].second, pose[2].second));
+        }
+    }
+
+    // right arm
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT) {
+        LOGI("ARM-R");
+        if (pose[3].first == false || pose[4].first == false || pose[5].first == false) {
+            posePart[1].first = false;
+        } else {
+            posePart[1].first = true;
+            posePart[1].second.push_back(getUnitVectors(pose[3].second, pose[4].second));
+            posePart[1].second.push_back(getUnitVectors(pose[4].second, pose[5].second));
+        }
+    }
+
+    // left arm
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT) {
+        LOGI("ARM-L");
+        if (pose[6].first == false || pose[7].first == false || pose[8].first == false) {
+            posePart[2].first = false;
+        } else {
+            posePart[2].first = true;
+            posePart[2].second.push_back(getUnitVectors(pose[6].second, pose[7].second));
+            posePart[2].second.push_back(getUnitVectors(pose[7].second, pose[8].second));
+        }
+    }
+
+    // right leg
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT) {
+        LOGI("LEG-R");
+        if (pose[10].first == false || pose[11].first == false || pose[12].first == false) {
+            posePart[3].first = false;
+        } else {
+            posePart[3].first = true;
+            posePart[3].second.push_back(getUnitVectors(pose[10].second, pose[11].second));
+            posePart[3].second.push_back(getUnitVectors(pose[11].second, pose[12].second));
+        }
+    }
+
+    // left leg
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT) {
+        LOGI("LEG-L");
+        if (pose[13].first == false || pose[14].first == false || pose[15].first == false) {
+            posePart[4].first = false;
+        } else {
+            posePart[4].first = true;
+            posePart[4].second.push_back(getUnitVectors(pose[13].second, pose[14].second));
+            posePart[4].second.push_back(getUnitVectors(pose[14].second, pose[15].second));
+
+        }
+    }
+
+    // body
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_BODY) {
+        LOGI("BODY");
+        if (pose[2].first == false || pose[9].first == false ||
+            pose[10].first == false || pose[13].first == false) {
+            posePart[5].first = false;
+        } else {
+            posePart[5].first = true;
+            posePart[5].second.push_back(getUnitVectors(pose[2].second, pose[9].second));
+            posePart[5].second.push_back(getUnitVectors(pose[9].second, pose[10].second));
+            posePart[5].second.push_back(getUnitVectors(pose[9].second, pose[13].second));
+        }
+    }
+
+    LOGI("LEAVE");
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+int Posture::setPoseFromFile(const std::string motionCaptureFilePath, const std::string motionMappingFilePath)
+{
+    LOGI("ENTER");
+
+    int ret = MEDIA_VISION_ERROR_NONE;
+
+    // parsing motion capture file
+    LOGD("%s", motionCaptureFilePath.c_str());
+    LOGD("%s", motionMappingFilePath.c_str());
+    ret = mBvhParser.parse(motionCaptureFilePath.c_str(), &mBvh);
+    LOGD("frames: %d",mBvh.num_frames());
+
+    if (ret != MEDIA_VISION_ERROR_NONE) {
+        LOGE("Fail to parse a file [%s]", motionCaptureFilePath.c_str());
+        return MEDIA_VISION_ERROR_INTERNAL;
+    }
+
+    mBvh.recalculate_joints_ltm();
+
+    LOGD("reading motion mapping....");
+    // read motion mapping file
+    std::ifstream fp(motionMappingFilePath.c_str());
+    if (!fp.is_open()) {
+        LOGE("Fail to open %s", motionMappingFilePath.c_str());
+        return MEDIA_VISION_ERROR_INVALID_PATH;
+    }
+
+    std::string line;
+    mMotionToPoseMap.clear();
+    while (!fp.eof()) {
+        std::getline(fp, line);
+        LOGD("%s", line.c_str());
+        std::istringstream lineStream(line);
+        std::string token;
+        std::vector<std::string> parsedString;
+        while(getline(lineStream, token, ',')) {
+            parsedString.push_back(token);
+        }
+        LOGD("name: %s, mapping index: %d", parsedString[0].c_str(), std::stoi(parsedString[1]));
+        mMotionToPoseMap.insert(make_pair(parsedString[0], std::stoi(parsedString[1])));
+    }
+
+    fp.close();
+    LOGD("mapping size is %zd", mMotionToPoseMap.size());
+
+    // convert bvh to pose
+    //convertBvhToPose();
+    //for (std::shared_ptr<Bvh::Joint>)
+
+    float pointX, pointY, minX, minY, maxX, maxY;
+    minX = minY = FLT_MAX;
+    maxX = maxY = FLT_MIN;
+    for (std::shared_ptr<Joint> joint : mBvh.joints()) {
+        std::map<std::string, int>::iterator it = mMotionToPoseMap.find(std::string(joint->name()));
+        if (it != mMotionToPoseMap.end()) {
+            pointX = joint->pos(0)[0];
+            pointY = joint->pos(0)[1];
+            if (pointX < minX)
+                minX = pointX;
+
+            if (pointY < minY)
+                minY = pointY;
+
+            if (pointX > maxX)
+                maxX = pointX;
+
+            if (pointY > maxY)
+                maxY = pointY;
+
+            mPose[it->second].first = true;
+            mPose[it->second].second = cv::Point(pointX, pointY);
+            LOGD("%d: (%f,%f)", it->second, pointX, pointY);
+        }
+    }
+
+    // add offset to make x > 0 and y > 0
+    int height = (int)maxY - (int)minY + POSE_OFFSET_VALUE;
+    for (std::vector<std::pair<bool, cv::Point>>::iterator iter = mPose.begin();
+        iter != mPose.end(); iter++) {
+        if (iter->first == false)
+            continue;
+
+        iter->second.x -= minX;
+        iter->second.y -= minY;
+
+        iter->second.x += POSE_OFFSET_VALUE;
+        iter->second.y += POSE_OFFSET_VALUE;
+
+        iter->second.y = height - iter->second.y;
+
+        LOGE("(%d, %d)", iter->second.x, iter->second.y);
+    }
+
+    ret = getParts((MV_INFERENCE_HUMAN_BODY_PART_HEAD |
+                    MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT |
+                    MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT |
+                    MV_INFERENCE_HUMAN_BODY_PART_BODY |
+                    MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT |
+                    MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT),
+                    mPose, mPoseParts);
+    if (ret != MEDIA_VISION_ERROR_NONE) {
+        LOGE("Fail to getPartse");
+        return ret;
+    }
+
+    LOGI("LEAVE");
+
+    return ret;
+}
+
+cv::Vec2f Posture::getUnitVectors(cv::Point point1, cv::Point point2)
+{
+    LOGI("ENTER");
+
+    cv::Vec2i vec(point1.x - point2.x, point1.y - point2.y);
+    cv::Vec2f unitVec (vec[0]/cv::norm(vec, cv::NORM_L1), vec[1]/cv::norm(vec, cv::NORM_L1));
+
+    LOGI("LEAVE");
+
+    return unitVec;
+}
+
+float Posture::cosineSimilarity(std::vector<cv::Vec2f> vec1, std::vector<cv::Vec2f> vec2, int size)
+{
+    float numer = 0.0f;
+    float denom1 = 0.0f;
+    float denom2 = 0.0f;
+
+    float value = 0.0f;
+
+    for (int k = 0; k < size; ++k) {
+        numer = denom1 = denom2 = 0.0f;
+        for (int dim = 0; dim <2; ++dim) {
+            numer += (vec1[k][dim] * vec2[k][dim]);
+            denom1 += (vec1[k][dim] * vec1[k][dim]);
+            denom2 += (vec2[k][dim] * vec2[k][dim]);
+        }
+        LOGI("similarity: %f", numer / sqrt( denom1 * denom2));
+        value += numer / sqrt( denom1 * denom2);
+
+    }
+
+    return value;
+}
+
+float Posture::getSimilarity(int parts,
+                                       std::vector<std::pair<bool, std::vector<cv::Vec2f>>>& posePart,
+                                       std::vector<std::pair<bool, std::vector<cv::Vec2f>>>& actionPart)
+{
+    float score = 0.0f;
+    int bodyCount = 0;
+    std::vector<int> index;
+
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_HEAD) {
+        index.push_back(0);
+    }
+
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT) {
+        index.push_back(1);
+    }
+
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT) {
+        index.push_back(2);
+    }
+
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT) {
+        index.push_back(3);
+    }
+
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT) {
+        index.push_back(4);
+    }
+
+    if (parts & MV_INFERENCE_HUMAN_BODY_PART_BODY) {
+        index.push_back(5);
+    }
+
+    for (std::vector<int>::iterator it = index.begin(); it != index.end(); ++it) {
+        if (posePart[(*it)].first && actionPart[(*it)].first &&
+            (posePart[(*it)].second.size() == actionPart[(*it)].second.size())) {
+            score += cosineSimilarity(posePart[(*it)].second, actionPart[(*it)].second, posePart[(*it)].second.size());
+
+            bodyCount += posePart[(*it)].second.size();
+            LOGI("body[%d], score[%f], count[%d]", (*it), score, bodyCount);
+        }
+    }
+
+    score /= (float)bodyCount;
+    LOGD("score: %1.3f", score);
+
+    return score;
+
+}
+
+int Posture::compare(int parts, std::vector<std::pair<bool, cv::Point>> action, float* score)
+{
+    LOGI("ENTER");
+
+    std::vector<std::pair<bool, std::vector<cv::Vec2f>>> actionParts;
+    actionParts.assign(6, std::make_pair(false, std::vector<cv::Vec2f>()));
+    int ret = getParts(parts, action, actionParts);
+    if (ret != MEDIA_VISION_ERROR_NONE) {
+        LOGE("Fail to getPartse");
+        return ret;
+    }
+
+    *score = getSimilarity(parts, mPoseParts, actionParts);
+
+    LOGI("LEAVE");
+
+    return MEDIA_VISION_ERROR_NONE;
+}
+
+}
+} // namespace
diff --git a/mv_inference/inference/src/mv_inference.c b/mv_inference/inference/src/mv_inference.c

index 4f2d02c..7f7eb93 100644 (file)
--- a/mv_inference/inference/src/mv_inference.c
+++ b/mv_inference/inference/src/mv_inference.c
@@ -315,4 +315,175 @@ int mv_inference_facial_landmark_detect(
         return ret;
  
  #endif
-}
-\ No newline at end of file
+}
+
+int mv_inference_pose_landmark_detect(
+               mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi,
+               mv_inference_pose_landmark_detected_cb detected_cb, void *user_data)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_face_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(source);
+       MEDIA_VISION_INSTANCE_CHECK(infer);
+       MEDIA_VISION_NULL_ARG_CHECK(detected_cb);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+
+       ret = mv_inference_pose_landmark_detect_open(source, infer, roi,
+                                                                                                detected_cb, user_data);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_inference_pose_get_number_of_poses(mv_inference_pose_result_h result, int *number_of_poses)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(result);
+
+       MEDIA_VISION_NULL_ARG_CHECK(number_of_poses);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       int value = 0;
+       ret = mv_inference_pose_get_number_of_poses_open(result, &value);
+
+       *number_of_poses = value;
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+
+int mv_inference_pose_get_number_of_landmarks(mv_inference_pose_result_h result, int *number_of_landmarks)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(result);
+
+       MEDIA_VISION_NULL_ARG_CHECK(number_of_landmarks);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       int value = 0;
+       ret = mv_inference_pose_get_number_of_landmarks_open(result, &value);
+
+       *number_of_landmarks = value;
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_inference_pose_get_landmark(mv_inference_pose_result_h result,
+               int pose_index, int part_index, mv_point_s *location, float *score)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(result);
+
+       MEDIA_VISION_NULL_ARG_CHECK(location);
+       MEDIA_VISION_NULL_ARG_CHECK(score);
+
+       if (pose_index < 0 || part_index < 0)
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       mv_point_s value1 = {0,0};
+       float value2 = 0.0f;
+
+       ret = mv_inference_pose_get_landmark_open(result, pose_index, part_index, &value1, &value2);
+
+       *location = value1;
+       *score = value2;
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+
+int mv_pose_create(mv_pose_h *pose)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_NULL_ARG_CHECK(pose);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_pose_create_open(pose);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_destroy(mv_pose_h pose)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(pose);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_pose_destroy_open(pose);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_set_from_file(mv_pose_h pose, const char *motion_capture_file_path, const char *motion_mapping_file_path)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(pose);
+       MEDIA_VISION_NULL_ARG_CHECK(motion_capture_file_path);
+       MEDIA_VISION_NULL_ARG_CHECK(motion_mapping_file_path);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_pose_set_from_file_open(pose, motion_capture_file_path, motion_mapping_file_path);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
+
+int mv_pose_compare(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score)
+{
+       MEDIA_VISION_SUPPORT_CHECK(
+                       __mv_inference_check_system_info_feature_supported());
+       MEDIA_VISION_INSTANCE_CHECK(pose);
+       MEDIA_VISION_INSTANCE_CHECK(action);
+       MEDIA_VISION_NULL_ARG_CHECK(score);
+
+       MEDIA_VISION_FUNCTION_ENTER();
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       ret = mv_pose_compare_open(pose, action, parts, score);
+
+       MEDIA_VISION_FUNCTION_LEAVE();
+
+       return ret;
+}
diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp

index 09f9544..cb12fc0 100644 (file)
--- a/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_inference/inference/src/mv_inference_open.cpp
@@ -18,6 +18,7 @@
  #include "mv_inference_open.h"
  
  #include "Inference.h"
+#include "Posture.h"
  
  #include <new>
  #include <unistd.h>
@@ -784,4 +785,199 @@ int mv_inference_facial_landmark_detect_open(
         detected_cb(source, numberOfLandmarks, locations.data(), user_data);
  
         return ret;
-}
-\ No newline at end of file
+}
+
+int mv_inference_pose_landmark_detect_open(
+               mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi,
+               mv_inference_pose_landmark_detected_cb detected_cb, void *user_data)
+{
+       Inference *pInfer = static_cast<Inference *>(infer);
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
+
+       unsigned int width, height;
+       ret = mv_source_get_width(source, &width);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to get width");
+               return ret;
+       }
+
+       ret = mv_source_get_height(source, &height);
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to get height");
+               return ret;
+       }
+
+       sources.push_back(source);
+
+       if (roi != NULL)
+               rects.push_back(*roi);
+
+       ret = pInfer->Run(sources, rects);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to run inference");
+               return ret;
+       }
+
+       mv_inference_pose_result_h result = NULL;
+       ret = pInfer->GetPoseLandmarkDetectionResults(
+                       &result, width, height);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to get inference results");
+               return ret;
+       }
+
+       mv_inference_pose_s *tmp = static_cast<mv_inference_pose_s *>(result);
+       for (int n = 0; n < POSE_MAX_LANDMARKS; ++n) {
+               LOGI("PoseIdx[%2d]: x[%d], y[%d], score[%.3f]", n,
+                                                                       tmp->landmarks[n].point.x,
+                                                                       tmp->landmarks[n].point.y,
+                                                                       tmp->landmarks[n].score);
+       }
+
+       detected_cb(source, result, -1, user_data);
+
+       return ret;
+}
+
+int mv_inference_pose_get_number_of_poses_open(
+               mv_inference_pose_result_h result,
+               int *number_of_poses)
+{
+       mv_inference_pose_s *handle = static_cast<mv_inference_pose_s *>(result);
+
+       *number_of_poses = handle->number_of_poses;
+
+       LOGI("%d", *number_of_poses);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_inference_pose_get_number_of_landmarks_open(
+               mv_inference_pose_result_h result,
+               int *number_of_landmarks)
+{
+       mv_inference_pose_s *handle = static_cast<mv_inference_pose_s *>(result);
+
+       *number_of_landmarks = handle->number_of_landmarks_per_pose;
+
+       LOGI("%d", *number_of_landmarks);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_inference_pose_get_landmark_open(
+               mv_inference_pose_result_h result,
+               int pose_index,
+               int part_index,
+               mv_point_s *location,
+               float *score)
+{
+       mv_inference_pose_s *handle = static_cast<mv_inference_pose_s *>(result);
+
+       if (pose_index < 0 || pose_index >= handle->number_of_poses)
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+
+       if (part_index < 0 || part_index >= handle->number_of_landmarks_per_pose)
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+
+       *location = handle->landmarks[part_index].point;
+
+       *score = handle->landmarks[part_index].score;
+
+       LOGI("[%d]:(%dx%d) - %.4f", pose_index, location->x, location->y, *score);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_create_open(mv_pose_h *pose)
+{
+       if (pose == NULL) {
+               LOGE("Handle can't be created because handle pointer is NULL");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       (*pose) = static_cast<mv_pose_h>(new (std::nothrow) Posture);
+
+       if (*pose == NULL) {
+               LOGE("Failed to create pose handle");
+               return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+       }
+
+       LOGD("Inference handle [%p] has been created", *pose);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_destroy_open(mv_pose_h pose)
+{
+       if (!pose) {
+               LOGE("Hand can't be destroyed because handle is NULL");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       LOGD("Destroy pose handle [%p]", pose);
+       delete static_cast<Posture *>(pose);
+       LOGD("Pose handle has been destroyed");
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_set_from_file_open(mv_pose_h pose,
+               const char *motionCaptureFilePath,
+               const char *motionMappingFilePath)
+{
+       Posture *pPose = static_cast<Posture *>(pose);
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       // check file
+       if (access(motionCaptureFilePath, F_OK) || access(motionMappingFilePath, F_OK)) {
+        LOGE("Invalid Motion Capture file path [%s]", motionCaptureFilePath);
+               LOGE("Invalid Motion Mapping file path [%s]", motionMappingFilePath);
+
+        return MEDIA_VISION_ERROR_INVALID_PATH;
+    }
+
+       ret = pPose->setPoseFromFile(std::string(motionCaptureFilePath),
+                                                               std::string(motionMappingFilePath));
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to setPoseFromFile");
+               return ret;
+       }
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
+int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score)
+{
+       Posture *pPose = static_cast<Posture *>(pose);
+
+       int ret = MEDIA_VISION_ERROR_NONE;
+
+       std::vector<std::pair<bool, cv::Point>> actionParts;
+
+       mv_inference_pose_s *pAction = static_cast<mv_inference_pose_s *>(action);
+       for (int k = 0; k < POSE_MAX_LANDMARKS; ++k) {
+               if (pAction->landmarks[k].point.x == -1 || pAction->landmarks[k].point.y == -1) {
+                       actionParts.push_back(std::make_pair(false, cv::Point(-1,-1)));
+                       continue;
+               }
+
+               actionParts.push_back(std::make_pair(true, cv::Point(pAction->landmarks[k].point.x,
+                                                                                                                       pAction->landmarks[k].point.y)));
+
+       }
+
+       ret = pPose->compare(parts, actionParts, score);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               LOGE("Fail to compare");
+               return ret;
+       }
+
+       LOGD("score: %1.4f", *score);
+
+       return MEDIA_VISION_ERROR_NONE;
+}
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec

index 49104f7..be1b209 100644 (file)
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
  Name:        capi-media-vision
  Summary:     Media Vision library for Tizen Native API
-Version:     0.5.3
+Version:     0.6.0
  Release:     1
  Group:       Multimedia/Framework
  License:     Apache-2.0 and BSD-3-Clause
diff --git a/test/testsuites/inference/inference_test_suite.c b/test/testsuites/inference/inference_test_suite.c

index 02c666e..9b21c45 100644 (file)
--- a/test/testsuites/inference/inference_test_suite.c
+++ b/test/testsuites/inference/inference_test_suite.c
@@ -59,7 +59,7 @@
  #define IC_OPENCV_CONFIG_CAFFE_PATH \
         "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt"
  
-//Obeject Detection
+//Object Detection
  #define OD_LABEL_PATH \
         "/usr/share/capi-media-vision/models/OD/tflite/od_label.txt"
  #define OD_TFLITE_WEIGHT_PATH \
@@ -81,7 +81,7 @@
  #define FD_OPENCV_CONFIG_CAFFE_PATH \
         "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt"
  
-//Facila LandmarkDetection
+//Facial LandmarkDetection
  #define FLD_TFLITE_WEIGHT_PATH \
         "/usr/share/capi-media-vision/models/FLD/tflite/fld_tflite_model1.tflite"
  
@@ -90,15 +90,25 @@
  #define FLD_OPENCV_CONFIG_CAFFE_PATH \
         "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt"
  
+//Pose Detection
+#define PLD_TFLITE_WEIGHT_PATH \
+       "/usr/share/capi-media-vision/models/PLD/tflite/pld-tflite-001.tflite"
+#define PLD_POSE_LABEL_PATH \
+       "/usr/share/capi-media-vision/models/PLD/tflite/pose-label.txt"
+#define PLD_MOTION_CAPTURE_FILE_PATH \
+       "/usr/share/capi-media-vision/models/PLD/mocap/example.bvh"
+#define PLD_MOTION_CAPTURE_MAPPING_FILE_PATH \
+       "/usr/share/capi-media-vision/models/PLD/mocap/example-mocap-mapping.txt"
+
  /******
   * Public model:
   *  IC: mobilenet caffe, tf?
   *  OD: mobilenetv1-ssd caffe, tf?
   *  FD: caffe, tf
   *  FLD: caffe, tf
- *  PE: cpm model, tf and tflite.
+ *  PD: cpm model, tf and tflite.
   *      link : https://github.com/edvardHua/PoseEstimationForMobile/tree/master/release/cpm_model
- *      Ps. media vision supports cpm and hourglass models for pose estimaion for now.
+ *      Ps. media vision supports cpm and hourglass models for pose estimation for now.
   */
  
  #define NANO_PER_SEC ((__clock_t) 1000000000)
@@ -162,14 +172,57 @@ void _facial_landmark_detected_cb(mv_source_h source,
         }
  }
  
-void _pose_estimation_detected_cb(mv_source_h source,
-                                                                 const int number_of_pose_estimation,
-                                                                 const mv_point_s *locations, void *user_data)
+void _pose_landmark_detected_cb(mv_source_h source,
+                                                                 mv_inference_pose_result_h pose, int label, void *user_data)
  {
-       printf("In callback, %d pose estimation\n", number_of_pose_estimation);
-       for (int n = 0; n < number_of_pose_estimation; n++) {
-               printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y);
+       int cb_number_of_poses = 0;
+       int cb_number_of_landmarks = 0;
+       mv_inference_pose_get_number_of_poses(pose, &cb_number_of_poses);
+       mv_inference_pose_get_number_of_landmarks(pose, &cb_number_of_landmarks);
+       printf("%d pose with %d landmarks\n",cb_number_of_poses, cb_number_of_landmarks);
+
+       mv_point_s point;
+       float score;
+       for (int k = 0; k < cb_number_of_poses; ++k)
+       for (int n = 0; n < cb_number_of_landmarks; n++) {
+               mv_inference_pose_get_landmark(pose, k, n, &point, &score);
+               printf("%d-%d: x[%d], y[%d] with %.4f\n", k, n, point.x, point.y, score);
+       }
+
+       mv_pose_h poser;
+       float poseScore;
+       int ret = mv_pose_create(&poser);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               printf("Fail to create mv_pose_h");
+               return;
+       }
+
+       ret = mv_pose_set_from_file(poser,
+                                               PLD_MOTION_CAPTURE_FILE_PATH,
+                                               PLD_MOTION_CAPTURE_MAPPING_FILE_PATH);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               mv_pose_destroy(poser);
+               printf("Fail to mv_pose_set_from_file");
+               return;
+       }
+
+       ret = mv_pose_compare(poser, pose,
+                       (MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT | MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT),
+                       &poseScore);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               mv_pose_destroy(poser);
+               printf("Fail to mv_pose_compare");
+               return;
         }
+
+       printf("[Leg]:Left&Right - poseScore:[%1.4f]", poseScore);
+
+       ret = mv_pose_destroy(poser);
+       if (ret != MEDIA_VISION_ERROR_NONE) {
+               printf("Fail to destroy mv_pose_h but keep going..");
+       }
+
+       return;
  }
  
  void _image_classified_cb(mv_source_h source, const int number_of_classes,
@@ -1016,7 +1069,7 @@ int perform_image_classification()
  
                         struct timespec diffspec = diff(s_tspec, e_tspec);
                         unsigned long timeDiff = gettotalmillisec(diffspec);
-                       printf("elased time : %lu(ms)\n", timeDiff);
+                       printf("elapsed time : %lu(ms)\n", timeDiff);
  
                 } break;
                 case 9: {
@@ -1449,7 +1502,7 @@ int perform_object_detection()
  
                         struct timespec diffspec = diff(s_tspec, e_tspec);
                         unsigned long timeDiff = gettotalmillisec(diffspec);
-                       printf("elased time : %lu(ms)\n", timeDiff);
+                       printf("elapsed time : %lu(ms)\n", timeDiff);
  
                 } break;
                 case 7: {
@@ -1864,7 +1917,7 @@ int perform_face_detection()
  
                         struct timespec diffspec = diff(s_tspec, e_tspec);
                         unsigned long timeDiff = gettotalmillisec(diffspec);
-                       printf("elased time : %lu(ms)\n", timeDiff);
+                       printf("elapsed time : %lu(ms)\n", timeDiff);
                 } break;
                 case 7: {
                         //perform destroy
@@ -2194,7 +2247,7 @@ int perform_facial_landmark_detection()
  
                         struct timespec diffspec = diff(s_tspec, e_tspec);
                         unsigned long timeDiff = gettotalmillisec(diffspec);
-                       printf("elased time : %lu(ms)\n", timeDiff);
+                       printf("elapsed time : %lu(ms)\n", timeDiff);
                 } break;
                 case 6: {
                         //perform destroy
@@ -2278,14 +2331,279 @@ int perform_facial_landmark_detection()
         return MEDIA_VISION_ERROR_NONE;
  }
  
+int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg)
+{
+       int err = MEDIA_VISION_ERROR_NONE;
+
+       mv_engine_config_h handle = NULL;
+       err = mv_create_engine_config(&handle);
+       if (err != MEDIA_VISION_ERROR_NONE) {
+               printf("Fail to create engine configuration handle.\n");
+               if (handle) {
+                       int err2 = mv_destroy_engine_config(handle);
+                       if (err2 != MEDIA_VISION_ERROR_NONE) {
+                               printf("Fail to destroy engine cofniguration.\n");
+                       }
+               }
+               return err;
+       }
+
+       char *inputNodeName = "image";
+       char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" };
+
+       mv_engine_config_set_string_attribute(
+                       handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, PLD_TFLITE_WEIGHT_PATH);
+
+       mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE,
+                                                                          MV_INFERENCE_DATA_FLOAT32);
+
+       mv_engine_config_set_string_attribute(
+                       handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH);
+
+       mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE,
+                                                                                 0.0);
+
+       mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE,
+                                                                                 1.0);
+
+       mv_engine_config_set_double_attribute(
+                       handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3);
+
+       mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE,
+                                                                          MV_INFERENCE_BACKEND_TFLITE);
+
+       mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE,
+                                                                          MV_INFERENCE_TARGET_CPU);
+
+       mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH,
+                                                                          192);
+
+       mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT,
+                                                                          192);
+
+       mv_engine_config_set_int_attribute(handle,
+                                                                          MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3);
+
+       mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME,
+                                                                                 inputNodeName);
+
+       mv_engine_config_set_array_string_attribute(
+                       handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1);
+
+       *engine_cfg = handle;
+       return err;
+}
+
+
+int perform_pose_landmark_detection()
+{
+       int err = MEDIA_VISION_ERROR_NONE;
+
+       int sel_opt = 0;
+       const int options[] = { 1, 2, 3, 4, 5 };
+       const *names[] = { "Configuration",
+                                               "TFLITE(CPU) + CPM",
+                                               "Prepare",
+                                               "Run",
+                                               "Back" };
+
+       mv_engine_config_h engine_cfg = NULL;
+       mv_inference_h infer = NULL;
+       mv_source_h mvSource = NULL;
+
+       while (sel_opt == 0) {
+               sel_opt = show_menu("Select Action:", options, names,
+                                                       ARRAY_SIZE(options));
+               switch (sel_opt) {
+               case 1: {
+                       //perform configuration
+                       if (engine_cfg) {
+                               int err2 = mv_destroy_engine_config(engine_cfg);
+                               if (err2 != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+                               engine_cfg = NULL;
+                       }
+
+                       err = perform_configuration(&engine_cfg);
+               } break;
+               case 2: {
+                       //perform SRID TweakCNN config
+                       if (engine_cfg) {
+                               int err2 = mv_destroy_engine_config(engine_cfg);
+                               if (err2 != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy engine_cfg [err:%i]\n", err2);
+                               engine_cfg = NULL;
+                       }
+                       err = perform_armnn_cpm_config(&engine_cfg);
+               } break;
+               case 3: {
+                       // create - configure - prepare
+                       if (infer) {
+                               int err2 = mv_inference_destroy(infer);
+                               if (err2 != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy inference handle [err:%i]\n", err2);
+                               infer = NULL;
+                       }
+
+                       // inference
+                       // create handle
+                       err = mv_inference_create(&infer);
+                       if (err != MEDIA_VISION_ERROR_NONE) {
+                               printf("Fail to create inference handle [err:%i]\n", err);
+                               break;
+                       }
+
+                       //configure
+                       err = mv_inference_configure(infer, engine_cfg);
+                       if (err != MEDIA_VISION_ERROR_NONE) {
+                               printf("Fail to configure inference handle [err:%i]\n", err);
+                               break;
+                       }
+
+                       //prepare
+                       err = mv_inference_prepare(infer);
+                       if (err != MEDIA_VISION_ERROR_NONE) {
+                               printf("Fail to prepare inference handle");
+                               break;
+                       }
+               } break;
+               case 4: {
+                       if (mvSource) {
+                               int err2 = mv_destroy_source(mvSource);
+                               if (err2 != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy mvSource\n");
+                               mvSource = NULL;
+                       }
+
+                       char *in_file_name = NULL;
+                       /* Load media source */
+                       while (input_string("Input file name to be inferred:", 1024,
+                                                               &(in_file_name)) == -1)
+                               printf("Incorrect input! Try again.\n");
+
+                       err = mv_create_source(&mvSource);
+                       if (err != MEDIA_VISION_ERROR_NONE) {
+                               printf("Fail to create mvSource.\n");
+                               free(in_file_name);
+                               break;
+                       }
+
+                       err = load_mv_source_from_file(in_file_name, mvSource);
+                       if (err != MEDIA_VISION_ERROR_NONE) {
+                               int err2 = mv_destroy_source(mvSource);
+                               if (err2 != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy mvSource.\n", err2);
+                               mvSource = NULL;
+                               free(in_file_name);
+                               break;
+                       }
+                       free(in_file_name);
+
+                       struct timespec s_tspec;
+                       struct timespec e_tspec;
+
+                       clock_gettime(CLOCK_MONOTONIC, &s_tspec);
+
+                       // Object Detect
+                       err = mv_inference_pose_landmark_detect(
+                                       mvSource, infer, NULL, _pose_landmark_detected_cb, NULL);
+
+                       clock_gettime(CLOCK_MONOTONIC, &e_tspec);
+
+                       struct timespec diffspec = diff(s_tspec, e_tspec);
+                       unsigned long timeDiff = gettotalmillisec(diffspec);
+                       printf("elapsed time : %lu(ms)\n", timeDiff);
+               } break;
+               case 5: {
+                       //perform destroy
+                       if (engine_cfg) {
+                               err = mv_destroy_engine_config(engine_cfg);
+                               if (err != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy engine_cfg [err:%i]\n", err);
+                               engine_cfg = NULL;
+                       }
+
+                       if (infer) {
+                               err = mv_inference_destroy(infer);
+                               if (err != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy inference handle [err:%i]\n", err);
+                               infer = NULL;
+                       }
+
+                       if (mvSource) {
+                               err = mv_destroy_source(mvSource);
+                               if (err != MEDIA_VISION_ERROR_NONE)
+                                       printf("Fail to destroy mvSource [err:%i]\n", err);
+                               mvSource = NULL;
+                       }
+               } break;
+               default:
+                       printf("Invalid option.\n");
+                       sel_opt = 0;
+                       continue;
+               }
+
+               int do_another = 0;
+               if (err != MEDIA_VISION_ERROR_NONE) {
+                       printf("ERROR: Action is finished with error code: %i\n");
+               }
+
+               sel_opt = 0;
+               const int options_last[] = { 1, 2 };
+               const char *names_last[] = { "Yes", "No" };
+
+               while (sel_opt == 0) {
+                       sel_opt = show_menu(
+                                       "Run Facial Landmark Detection again?:", options_last,
+                                       names_last, ARRAY_SIZE(options_last));
+                       switch (sel_opt) {
+                       case 1:
+                               do_another = 1;
+                               break;
+                       case 2:
+                               do_another = 0;
+                               break;
+                       default:
+                               printf("Invalid option.\n");
+                               sel_opt = 0;
+                       }
+               }
+
+               sel_opt = (do_another == 1) ? 0 : 1;
+       }
+
+       if (engine_cfg) {
+               err = mv_destroy_engine_config(engine_cfg);
+               if (err != MEDIA_VISION_ERROR_NONE)
+                       printf("Fail to destroy engine_cfg [err:%i]\n", err);
+               engine_cfg = NULL;
+       }
+
+       if (infer) {
+               err = mv_inference_destroy(infer);
+               if (err != MEDIA_VISION_ERROR_NONE)
+                       printf("Fail to destroy inference handle [err:%i]\n", err);
+               infer = NULL;
+       }
+
+       if (mvSource) {
+               err = mv_destroy_source(mvSource);
+               if (err != MEDIA_VISION_ERROR_NONE)
+                       printf("Fail to destroy mvSource [err:%i]\n", err);
+               mvSource = NULL;
+       }
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
  int main()
  {
         int sel_opt = 0;
  
-       const int options[] = { 1, 2, 3, 4, 5 };
+       const int options[] = { 1, 2, 3, 4, 5, 6 };
         const char *names[] = { "Image Classification", "Object Detection",
                                                          "Face Detection",               "Facial LandmarkDetection",
-                                                        "Exit" };
+                                                        "Pose Detection", "Exit" };
  
         int err = MEDIA_VISION_ERROR_NONE;
         while (sel_opt == 0) {
@@ -2315,9 +2633,14 @@ int main()
                         if (err != MEDIA_VISION_ERROR_NONE) {
                                 printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", err);
                         }
-                       break;
-               }
+               } break;
                 case 5: {
+                       err = perform_pose_landmark_detection();
+                       if (err != MEDIA_VISION_ERROR_NONE) {
+                               printf("Fail to perform pose landmark detection");
+                       }
+               } break;
+               case 6: {
                         printf("Exit");
                 } break;
                 default:
author	Tae-Young Chung <ty83.chung@samsung.com>
	Wed, 8 Jul 2020 07:26:25 +0000 (16:26 +0900)
committer	Tae-Young Chung <ty83.chung@samsung.com>
	Wed, 12 Aug 2020 01:31:56 +0000 (01:31 +0000)
include/mv_inference.h		patch \| blob \| history
include/mv_inference_private.h	[new file with mode: 0644]	patch \| blob
include/mv_inference_type.h		patch \| blob \| history
mv_inference/inference/include/Bvh.h	[new file with mode: 0644]	patch \| blob
mv_inference/inference/include/BvhParser.h	[new file with mode: 0644]	patch \| blob
mv_inference/inference/include/BvhUtils.h	[new file with mode: 0644]	patch \| blob
mv_inference/inference/include/Inference.h		patch \| blob \| history
mv_inference/inference/include/Joint.h	[new file with mode: 0644]	patch \| blob
mv_inference/inference/include/Posture.h	[new file with mode: 0644]	patch \| blob
mv_inference/inference/include/mv_inference_open.h		patch \| blob \| history
mv_inference/inference/src/Bvh.cpp	[new file with mode: 0644]	patch \| blob
mv_inference/inference/src/BvhParser.cpp	[new file with mode: 0644]	patch \| blob
mv_inference/inference/src/BvhUtils.cpp	[new file with mode: 0644]	patch \| blob
mv_inference/inference/src/Inference.cpp		patch \| blob \| history
mv_inference/inference/src/Posture.cpp	[new file with mode: 0644]	patch \| blob
mv_inference/inference/src/mv_inference.c		patch \| blob \| history
mv_inference/inference/src/mv_inference_open.cpp		patch \| blob \| history
packaging/capi-media-vision.spec		patch \| blob \| history
test/testsuites/inference/inference_test_suite.c		patch \| blob \| history