const mv_point_s *locations,
void *user_data);
+typedef void (*mv_inference_hand_detected_cb)(
+ mv_source_h source,
+ int number_of_hands,
+ const float *confidences,
+ const mv_rectangle_s *locations,
+ void *user_data);
+
/**
* @brief Performs facial landmarks detection on the @a source.
* @details Use this function to launch facial landmark detection.
mv_inference_pose_estimation_detected_cb detected_cb,
void *user_data);
+/**
+ * @brief Performs hand detection on the @a source.
+ * @details Use this function to launch hand detection.
+ * Each time when mv_inference_hand_detect() is
+ * called, @a detected_cb will receive a list of hands and their locations
+ * in the media source.
+ *
+ * @since_tizen 6.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] source The handle to the source of the media
+ * @param[in] infer The handle to the inference
+ * @param[in] detected_cb The callback which will be called for
+ * detecting hands on media source.
+ * This callback will receive the detection results.
+ * @param[in] user_data The user data passed from the code where
+ * mv_inference_hand_detect() is invoked. This data will
+ * be accessible in @a detected_cb callback.
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ * isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_inference_create()
+ * @pre Configure an inference handle by calling mv_inference_configure()
+ * @pre Prepare an inference by calling mv_inference_prepare()
+ * @post @a detected_cb will be called to provide detection results
+ *
+ * @see mv_inference_hand_detected_cb()
+ */
+int mv_inference_hand_detect(
+ mv_source_h source,
+ mv_inference_h infer,
+ mv_inference_hand_detected_cb detected_cb,
+ void *user_data);
+
/**
* @}
*/
std::vector<cv::Point> locations;
} PoseEstimationResults; /**< structure PoseEstimationResults */
+typedef struct _HandDetectionResults {
+ int number_of_hands;
+ std::vector<float> confidences;
+ std::vector<cv::Rect> locations;
+} HandDetectionResults; /**< structure HandDetectionResults */
+
namespace mediavision {
namespace inference {
*/
int GetPoseEstimationDetectionResults(PoseEstimationResults* results);
+ /**
+ * @brief Gets the HandDetectioResults
+ *
+ * @since_tizen 6.0
+ * @return @c true on success, otherwise a negative error value
+ */
+ int GetHandDetectionResults(HandDetectionResults *detectionResults);
+
int GetResults(std::vector<std::vector<int>>* dimInfo, std::vector<float*> *results);
mv_engine_config_h GetEngineConfig(void) { return engine_config; }
mv_inference_pose_estimation_detected_cb detected_cb,
void *user_data);
+/**
+ * @brief Performs hand detection on the @a source
+ * @details Use this function to launch hand detection.
+ * Each time when mv_inference_hand_detection is
+ * called, @a detected_cb will receive a list of hands and their locations
+ * on the media source.
+ *
+ * @since_tizen 6.0
+ *
+ * @param [in] source The handle to the source of the media
+ * @param [in] infer The handle to the inference
+ * @param [in] detected_cb The callback which will be called for
+ * detecting hands on media source.
+ * This callback will receive the detection results.
+ * @param [in] user_data The user data passed from the code where
+ * @ref mv_inference_hand_detect() is invoked. This data will
+ * be accessible from @a detected_cb callback.
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ * isn't supported
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ *
+ * @pre Create a source handle by calling @ref mv_create_source()
+ * @pre Create an inference handle by calling @ref mv_inference_create()
+ * @pre Configure an inference handle by calling @ref mv_inference_configure()
+ * @pre Prepare an inference by calling @ref mv_inference_prepare()
+ * @post @a detected_cb will be called to process detection results
+ *
+ * @see mv_inference_hand_detected_cb
+ */
+int mv_inference_hand_detect_open(
+ mv_source_h source,
+ mv_inference_h infer,
+ mv_inference_hand_detected_cb detected_cb,
+ void *user_data);
+
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
return MEDIA_VISION_ERROR_NONE;
}
+int Inference::GetHandDetectionResults(HandDetectionResults *detectionResults)
+{
+ tensor_t outputData;
+
+ // Get inference result and contain it to outputData.
+ int ret = FillOutputResult(outputData);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to get output result.");
+ return ret;
+ }
+
+ // In case of object detection,
+ // a model may apply post-process but others may not.
+ // Thus, those cases should be hanlded separately.
+ std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
+ LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
+
+ std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
+ LOGI("inferResults size: %zu", inferResults.size());
+
+ float* boxes = nullptr;
+ float* classes = nullptr;
+ float* scores = nullptr;
+ int number_of_detections = 0;
+
+ cv::Mat cvScores, cvClasses, cvBoxes;
+ if (outputData.dimInfo.size() == 1) {
+ // there is no way to know how many objects are detect unless the number of objects aren't
+ // provided. In the case, each backend should provide the number of results manually.
+ // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
+ // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
+ // indicats the image id. But it is useless if a batch mode isn't supported.
+ // So, use the 1st of 7.
+
+ number_of_detections = (int)(*reinterpret_cast<float*>(outputData.data[0]));
+ cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], CV_32F, outputData.data[0]);
+
+ // boxes
+ cv::Mat cvLeft = cvOutputData.col(3).clone();
+ cv::Mat cvTop = cvOutputData.col(4).clone();
+ cv::Mat cvRight = cvOutputData.col(5).clone();
+ cv::Mat cvBottom = cvOutputData.col(6).clone();
+
+ cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
+ cv::hconcat(cvBoxElems, 4, cvBoxes);
+
+ // classes
+ cvClasses = cvOutputData.col(1).clone();
+
+ // scores
+ cvScores = cvOutputData.col(2).clone();
+
+ boxes = cvBoxes.ptr<float>(0);
+ classes = cvClasses.ptr<float>(0);
+ scores = cvScores.ptr<float>(0);
+
+ } else {
+ boxes = reinterpret_cast<float*>(inferResults[0]);
+ classes = reinterpret_cast<float*>(inferResults[1]);
+ scores = reinterpret_cast<float*>(inferResults[2]);
+ number_of_detections = (int)(*reinterpret_cast<float*>(inferResults[3]));
+ }
+
+ int left, top, right, bottom;
+ cv::Rect loc;
+
+ HandDetectionResults results;
+ results.number_of_hands = 0;
+ for (int idx = 0; idx < number_of_detections; ++idx) {
+ if (scores[idx] < mThreshold)
+ continue;
+
+ left = (int)(boxes[idx*4 + 1] * mSourceSize.width);
+ top = (int)(boxes[idx*4 + 0] * mSourceSize.height);
+ right = (int)(boxes[idx*4 + 3] * mSourceSize.width);
+ bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height);
+
+ loc.x = left;
+ loc.y = top;
+ loc.width = right -left + 1;
+ loc.height = bottom - top + 1;
+
+ results.confidences.push_back(scores[idx]);
+ results.locations.push_back(loc);
+ results.number_of_hands++;
+
+ LOGI("confidence:%f", scores[idx]);
+ LOGI("class: %f", classes[idx]);
+ LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx*4 + 1], boxes[idx*4 + 0], boxes[idx*4 + 3], boxes[idx*4 + 2]);
+ LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
+ }
+
+ *detectionResults = results;
+ LOGE("Inference: GetHandDetectionResults: %d\n", results.number_of_hands);
+ return MEDIA_VISION_ERROR_NONE;
+}
+
} /* Inference */
} /* MediaVision */
return ret;
#endif
+}
+
+int mv_inference_hand_detect(
+ mv_source_h source,
+ mv_inference_h infer,
+ mv_inference_hand_detected_cb detected_cb,
+ void *user_data)
+{
+ MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported());
+ MEDIA_VISION_INSTANCE_CHECK(source);
+ MEDIA_VISION_INSTANCE_CHECK(infer);
+ MEDIA_VISION_NULL_ARG_CHECK(detected_cb);
+
+ MEDIA_VISION_FUNCTION_ENTER();
+
+ int ret = MEDIA_VISION_ERROR_NONE;
+
+#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT
+ /*
+ ret = mv_inference_hand_detect_lic(source, infer, detected_cb, user_data);
+ */
+#else
+
+ ret = mv_inference_hand_detect_open(source, infer, detected_cb, user_data);
+
+ MEDIA_VISION_FUNCTION_LEAVE();
+
+ return ret;
+
+#endif
}
\ No newline at end of file
return ret;
}
+
+int mv_inference_hand_detect_open(
+ mv_source_h source,
+ mv_inference_h infer,
+ mv_inference_hand_detected_cb detected_cb,
+ void *user_data)
+{
+ Inference *pInfer = static_cast<Inference *>(infer);
+
+ int ret = MEDIA_VISION_ERROR_NONE;
+ int numberOfOutputs = 0;
+ std::vector<mv_source_h> sources;
+ std::vector<mv_rectangle_s> rects;
+
+ sources.push_back(source);
+
+ ret = pInfer->Run(sources, rects);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to run inference");
+ return ret;
+ }
+
+ HandDetectionResults handDetectionResults;
+ ret = pInfer->GetHandDetectionResults(&handDetectionResults);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to get inference results");
+ return ret;
+ }
+
+ numberOfOutputs = handDetectionResults.number_of_hands;
+
+ float *confidences = handDetectionResults.confidences.data();
+ std::vector<mv_rectangle_s> locations(numberOfOutputs);
+
+ for (int n = 0; n < numberOfOutputs; ++n) {
+ locations[n].point.x = handDetectionResults.locations[n].x;
+ locations[n].point.y = handDetectionResults.locations[n].y;
+ locations[n].width = handDetectionResults.locations[n].width;
+ locations[n].height = handDetectionResults.locations[n].height;
+ }
+
+ detected_cb(source, numberOfOutputs, confidences, locations.data(), user_data);
+
+ return ret;
+}
\ No newline at end of file