2 * GStreamer hand gesture detection plugins
3 * Copyright (C) 2012 Andol Li <<andol@andol.info>>
4 * Copyright (C) 2013 Sreerenj Balachandran <sreerenj.balachandran@intel.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
24 * Alternatively, the contents of this file may be used under the
25 * GNU Lesser General Public License Version 2.1 (the "LGPL"), in
26 * which case the following provisions apply instead of the ones
29 * This library is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU Library General Public
31 * License as published by the Free Software Foundation; either
32 * version 2 of the License, or (at your option) any later version.
34 * This library is distributed in the hope that it will be useful,
35 * but WITHOUT ANY WARRANTY; without even the implied warranty of
36 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
37 * Library General Public License for more details.
39 * You should have received a copy of the GNU Library General Public
40 * License along with this library; if not, write to the
41 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
42 * Boston, MA 02111-1307, USA.
45 * SECTION:video-filter-handdetect
47 * FIXME:operates hand gesture detection in video streams and images,
48 * and enable media operation e.g. play/stop/fast forward/back rewind.
50 * ## Example launch line
53 * gst-launch-1.0 autovideosrc ! videoconvert ! "video/x-raw, format=RGB, width=320, height=240" ! \
54 * videoscale ! handdetect ! videoconvert ! xvimagesink
63 #include "gsthanddetect.h"
64 #include <opencv2/imgproc.hpp>
66 GST_DEBUG_CATEGORY_STATIC (gst_handdetect_debug);
67 #define GST_CAT_DEFAULT gst_handdetect_debug
68 #if (CV_MAJOR_VERSION < 4)
69 #define CASCADE_DO_CANNY_PRUNING CV_HAAR_DO_CANNY_PRUNING
72 /* define HAAR files */
73 #define HAAR_FILE_FIST GST_HAAR_CASCADES_DIR G_DIR_SEPARATOR_S "fist.xml"
74 #define HAAR_FILE_PALM GST_HAAR_CASCADES_DIR G_DIR_SEPARATOR_S "palm.xml"
78 /* Filter signals and args */
97 /* the capabilities of the inputs and outputs */
98 static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink",
101 GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("RGB"))
103 static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src",
106 GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("RGB"))
109 static void gst_handdetect_set_property (GObject * object, guint prop_id,
110 const GValue * value, GParamSpec * pspec);
111 static void gst_handdetect_get_property (GObject * object, guint prop_id,
112 GValue * value, GParamSpec * pspec);
113 static gboolean gst_handdetect_set_caps (GstOpencvVideoFilter * transform,
114 gint in_width, gint in_height, int in_cv_type,
115 gint out_width, gint out_height, int out_cv_type);
116 static GstFlowReturn gst_handdetect_transform_ip (GstOpencvVideoFilter *
117 transform, GstBuffer * buffer, Mat img);
119 static CascadeClassifier *gst_handdetect_load_profile (GstHanddetect * filter,
122 static void gst_handdetect_navigation_interface_init (GstNavigationInterface *
124 static void gst_handdetect_navigation_send_event (GstNavigation * navigation,
125 GstStructure * structure);
127 G_DEFINE_TYPE_WITH_CODE (GstHanddetect, gst_handdetect,
128 GST_TYPE_OPENCV_VIDEO_FILTER,
129 G_IMPLEMENT_INTERFACE (GST_TYPE_NAVIGATION,
130 gst_handdetect_navigation_interface_init);
131 GST_DEBUG_CATEGORY_INIT (gst_handdetect_debug,
132 "handdetect", 0, "opencv hand gesture detection"));
133 GST_ELEMENT_REGISTER_DEFINE (handdetect, "handdetect", GST_RANK_NONE,
134 GST_TYPE_HANDDETECT);
137 gst_handdetect_navigation_interface_init (GstNavigationInterface * iface)
139 iface->send_event = gst_handdetect_navigation_send_event;
142 /* FIXME: this function used to parse the region of interests coordinates
143 * sending from applications when the hand gestures reach the defined regions of interests,
144 * at this moment this function is not doing anything significantly
145 * but will be CHANGED when the gstreamer is patched with new hand gesture events
148 gst_handdetect_navigation_send_event (GstNavigation * navigation,
149 GstStructure * structure)
151 GstHanddetect *filter = GST_HANDDETECT (navigation);
154 if ((peer = gst_pad_get_peer (GST_BASE_TRANSFORM_CAST (filter)->sinkpad))) {
156 event = gst_event_new_navigation (structure);
157 gst_pad_send_event (peer, event);
158 gst_object_unref (peer);
162 /* clean opencv images and parameters */
164 gst_handdetect_finalize (GObject * obj)
166 GstHanddetect *filter = GST_HANDDETECT (obj);
168 filter->cvGray.release ();
169 g_free (filter->profile_fist);
170 g_free (filter->profile_palm);
171 delete (filter->best_r);
172 if (filter->cvCascade_fist)
173 delete filter->cvCascade_fist;
174 if (filter->cvCascade_palm)
175 delete filter->cvCascade_palm;
177 G_OBJECT_CLASS (gst_handdetect_parent_class)->finalize (obj);
180 /* initialise the HANDDETECT class */
182 gst_handdetect_class_init (GstHanddetectClass * klass)
184 GObjectClass *gobject_class;
185 GstOpencvVideoFilterClass *gstopencvbasefilter_class;
187 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
188 gobject_class = (GObjectClass *) klass;
189 gstopencvbasefilter_class = (GstOpencvVideoFilterClass *) klass;
191 gstopencvbasefilter_class->cv_trans_ip_func = gst_handdetect_transform_ip;
192 gstopencvbasefilter_class->cv_set_caps = gst_handdetect_set_caps;
194 gobject_class->finalize = GST_DEBUG_FUNCPTR (gst_handdetect_finalize);
195 gobject_class->set_property = gst_handdetect_set_property;
196 gobject_class->get_property = gst_handdetect_get_property;
198 g_object_class_install_property (gobject_class,
200 g_param_spec_boolean ("display",
202 "Whether the detected hands are highlighted in output frame",
203 TRUE, (GParamFlags) G_PARAM_READWRITE)
205 g_object_class_install_property (gobject_class,
207 g_param_spec_string ("profile_fist",
209 "Location of HAAR cascade file (fist gesture)",
210 HAAR_FILE_FIST, (GParamFlags) G_PARAM_READWRITE)
212 g_object_class_install_property (gobject_class,
214 g_param_spec_string ("profile_palm",
216 "Location of HAAR cascade file (palm gesture)",
217 HAAR_FILE_PALM, (GParamFlags) G_PARAM_READWRITE)
219 /* FIXME: property name needs fixing */
220 g_object_class_install_property (gobject_class,
222 g_param_spec_int ("ROI_X",
224 "X of left-top pointer in region of interest \nGestures in the defined region of interest will emit messages",
225 0, INT_MAX, 0, (GParamFlags) G_PARAM_READWRITE)
227 /* FIXME: property name needs fixing */
228 g_object_class_install_property (gobject_class,
230 g_param_spec_int ("ROI_Y",
232 "Y of left-top pointer in region of interest \nGestures in the defined region of interest will emit messages",
233 0, INT_MAX, 0, (GParamFlags) G_PARAM_READWRITE)
235 /* FIXME: property name needs fixing */
236 g_object_class_install_property (gobject_class,
238 g_param_spec_int ("ROI_WIDTH",
240 "WIDTH of left-top pointer in region of interest \nGestures in the defined region of interest will emit messages",
241 0, INT_MAX, 0, (GParamFlags) G_PARAM_READWRITE)
243 /* FIXME: property name needs fixing */
244 g_object_class_install_property (gobject_class,
246 g_param_spec_int ("ROI_HEIGHT",
248 "HEIGHT of left-top pointer in region of interest \nGestures in the defined region of interest will emit messages",
249 0, INT_MAX, 0, (GParamFlags) G_PARAM_READWRITE)
252 gst_element_class_set_static_metadata (element_class,
254 "Filter/Effect/Video",
255 "Performs hand gesture detection on videos, providing detected hand positions via bus message and navigation event, and deals with hand gesture events",
256 "Andol Li <andol@andol.info>");
258 gst_element_class_add_static_pad_template (element_class, &src_factory);
259 gst_element_class_add_static_pad_template (element_class, &sink_factory);
263 /* initialise the new element
264 * instantiate pads and add them to element
265 * set pad call-back functions
266 * initialise instance structure
269 gst_handdetect_init (GstHanddetect * filter)
271 const gchar *haar_path;
273 haar_path = g_getenv ("GST_HAAR_CASCADES_PATH");
275 filter->profile_fist = g_build_filename (haar_path, "fist.xml", NULL);
276 filter->profile_palm = g_build_filename (haar_path, "palm.xml", NULL);
278 filter->profile_fist = g_strdup (HAAR_FILE_FIST);
279 filter->profile_palm = g_strdup (HAAR_FILE_PALM);
284 filter->roi_width = 0;
285 filter->roi_height = 0;
286 filter->display = TRUE;
288 filter->cvCascade_fist =
289 gst_handdetect_load_profile (filter, filter->profile_fist);
290 filter->cvCascade_palm =
291 gst_handdetect_load_profile (filter, filter->profile_palm);
293 gst_opencv_video_filter_set_in_place (GST_OPENCV_VIDEO_FILTER_CAST (filter),
298 gst_handdetect_set_property (GObject * object, guint prop_id,
299 const GValue * value, GParamSpec * pspec)
301 GstHanddetect *filter = GST_HANDDETECT (object);
304 case PROP_PROFILE_FIST:
305 g_free (filter->profile_fist);
306 if (filter->cvCascade_fist)
307 delete filter->cvCascade_fist;
308 filter->profile_fist = g_value_dup_string (value);
309 filter->cvCascade_fist =
310 gst_handdetect_load_profile (filter, filter->profile_fist);
312 case PROP_PROFILE_PALM:
313 g_free (filter->profile_palm);
314 if (filter->cvCascade_palm)
315 delete filter->cvCascade_palm;
316 filter->profile_palm = g_value_dup_string (value);
317 filter->cvCascade_palm =
318 gst_handdetect_load_profile (filter, filter->profile_palm);
321 filter->display = g_value_get_boolean (value);
324 filter->roi_x = g_value_get_int (value);
327 filter->roi_y = g_value_get_int (value);
330 filter->roi_width = g_value_get_int (value);
332 case PROP_ROI_HEIGHT:
333 filter->roi_height = g_value_get_int (value);
336 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
342 gst_handdetect_get_property (GObject * object, guint prop_id, GValue * value,
345 GstHanddetect *filter = GST_HANDDETECT (object);
349 g_value_set_boolean (value, filter->display);
351 case PROP_PROFILE_FIST:
352 g_value_set_string (value, filter->profile_fist);
354 case PROP_PROFILE_PALM:
355 g_value_set_string (value, filter->profile_palm);
358 g_value_set_int (value, filter->roi_x);
361 g_value_set_int (value, filter->roi_y);
364 g_value_set_int (value, filter->roi_width);
366 case PROP_ROI_HEIGHT:
367 g_value_set_int (value, filter->roi_height);
370 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
375 /* GstElement vmethod implementations */
376 /* this function handles the link with other elements */
378 gst_handdetect_set_caps (GstOpencvVideoFilter * transform,
379 gint in_width, gint in_height, int in_cv_type,
380 gint out_width, gint out_height, int out_cv_type)
382 GstHanddetect *filter;
383 filter = GST_HANDDETECT (transform);
385 /* 320 x 240 is with the best detect accuracy, if not, give info */
386 if (in_width != 320 || in_height != 240)
387 GST_WARNING_OBJECT (filter,
388 "resize to 320 x 240 to have best detect accuracy.\n");
390 filter->cvGray.create (Size (in_width, in_height), CV_8UC1);
395 /* Hand detection function
396 * This function does the actual processing 'of hand detect and display'
399 gst_handdetect_transform_ip (GstOpencvVideoFilter * transform,
400 GstBuffer * buffer, Mat img)
402 GstHanddetect *filter = GST_HANDDETECT (transform);
407 vector < Rect > hands;
409 /* check detection cascades */
410 if (filter->cvCascade_fist && filter->cvCascade_palm) {
411 /* cvt to gray colour space for hand detect */
412 cvtColor (img, filter->cvGray, COLOR_RGB2GRAY);
414 /* detect FIST gesture fist */
415 Mat roi (filter->cvGray, Rect (0,
416 0, filter->cvGray.size ().width, filter->cvGray.size ().height));
417 filter->cvCascade_fist->detectMultiScale (roi, hands, 1.1, 2,
418 CASCADE_DO_CANNY_PRUNING, Size (24, 24), Size (0, 0));
420 /* if FIST gesture detected */
421 if (!hands.empty ()) {
423 int min_distance, distance;
427 /* Go through all detected FIST gestures to get the best one
428 * prev_r => previous hand
429 * best_r => best hand in this frame
431 /* set min_distance for init comparison */
432 min_distance = img.size ().width + img.size ().height;
433 /* Init filter->prev_r */
434 temp_r = Rect (0, 0, 0, 0);
435 if (filter->prev_r == NULL)
436 filter->prev_r = &temp_r;
437 /* Get the best FIST gesture */
438 for (i = 0; i < hands.size (); i++) {
440 distance = (int) sqrt (pow ((r->x - filter->prev_r->x),
441 2) + pow ((r->y - filter->prev_r->y), 2));
442 if (distance <= min_distance) {
443 min_distance = distance;
444 delete (filter->best_r);
445 filter->best_r = new Rect (*r);
448 /* Save best_r as prev_r for next frame comparison */
449 filter->prev_r = filter->best_r;
451 /* send msg to app/bus if the detected gesture falls in the region of interest */
452 /* get center point of gesture */
453 c = Point (filter->best_r->x + filter->best_r->width / 2,
454 filter->best_r->y + filter->best_r->height / 2);
456 * if the center point is in the region of interest, OR,
457 * if the region of interest remains default as (0,0,0,0)*/
458 if ((c.x >= filter->roi_x && c.x <= (filter->roi_x + filter->roi_width)
459 && c.y >= filter->roi_y
460 && c.y <= (filter->roi_y + filter->roi_height))
461 || (filter->roi_x == 0
462 && filter->roi_y == 0
463 && filter->roi_width == 0 && filter->roi_height == 0)) {
464 /* Define structure for message post */
465 s = gst_structure_new ("hand-gesture",
466 "gesture", G_TYPE_STRING, "fist",
468 (gint) (filter->best_r->x + filter->best_r->width * 0.5), "y",
470 (gint) (filter->best_r->y + filter->best_r->height * 0.5), "width",
471 G_TYPE_INT, (gint) filter->best_r->width, "height", G_TYPE_INT,
472 (gint) filter->best_r->height, NULL);
473 /* Init message element */
474 m = gst_message_new_element (GST_OBJECT (filter), s);
476 gst_element_post_message (GST_ELEMENT (filter), m);
480 * here we use mouse-move event instead of fist-move or palm-move event
481 * !!! this will CHANGE in the future !!!
482 * !!! by adding gst_navigation_send_hand_detect_event() in navigation.c !!!
484 gst_navigation_send_mouse_event (GST_NAVIGATION (filter),
487 (double) (filter->best_r->x + filter->best_r->width * 0.5),
488 (double) (filter->best_r->y + filter->best_r->height * 0.5));
492 /* Check filter->display,
493 * If TRUE, displaying red circle marker in the out frame */
494 if (filter->display) {
497 center.x = cvRound ((filter->best_r->x + filter->best_r->width * 0.5));
498 center.y = cvRound ((filter->best_r->y + filter->best_r->height * 0.5));
500 cvRound ((filter->best_r->width + filter->best_r->height) * 0.25);
501 circle (img, center, radius, CV_RGB (0, 0, 200), 1, 8, 0);
504 /* if NO FIST gesture, detecting PALM gesture */
505 filter->cvCascade_palm->detectMultiScale (roi, hands, 1.1, 2,
506 CASCADE_DO_CANNY_PRUNING, Size (24, 24), Size (0, 0));
507 /* if PALM detected */
508 if (!hands.empty ()) {
509 int min_distance, distance;
513 if (filter->display) {
514 GST_DEBUG_OBJECT (filter, "%d PALM gestures detected",
515 (int) hands.size ());
517 /* Go through all detected PALM gestures to get the best one
518 * prev_r => previous hand
519 * best_r => best hand in this frame
521 /* suppose a min_distance for init comparison */
522 min_distance = img.size ().width + img.size ().height;
523 /* Init filter->prev_r */
524 temp_r = Rect (0, 0, 0, 0);
525 if (filter->prev_r == NULL)
526 filter->prev_r = &temp_r;
527 /* Get the best PALM gesture */
528 for (i = 0; i < hands.size (); ++i) {
530 distance = (int) sqrt (pow ((r->x - filter->prev_r->x),
531 2) + pow ((r->y - filter->prev_r->y), 2));
532 if (distance <= min_distance) {
533 min_distance = distance;
534 delete (filter->best_r);
535 filter->best_r = new Rect (*r);
538 /* Save best_r as prev_r for next frame comparison */
539 filter->prev_r = filter->best_r;
541 /* send msg to app/bus if the detected gesture falls in the region of interest */
542 /* get center point of gesture */
543 c = Point (filter->best_r->x + filter->best_r->width / 2,
544 filter->best_r->y + filter->best_r->height / 2);
546 * if the center point is in the region of interest, OR,
547 * if the region of interest remains default as (0,0,0,0)*/
548 if (((gint) c.x >= filter->roi_x
549 && (gint) c.x <= (filter->roi_x + filter->roi_width)
550 && (gint) c.y >= filter->roi_y
551 && (gint) c.y <= (filter->roi_y + filter->roi_height))
552 || (filter->roi_x == 0 && filter->roi_y == 0
553 && filter->roi_width == 0 && filter->roi_height == 0)) {
554 /* Define structure for message post */
555 s = gst_structure_new ("hand-gesture",
556 "gesture", G_TYPE_STRING, "palm",
558 (gint) (filter->best_r->x + filter->best_r->width * 0.5), "y",
560 (gint) (filter->best_r->y + filter->best_r->height * 0.5),
561 "width", G_TYPE_INT, (gint) filter->best_r->width, "height",
562 G_TYPE_INT, (gint) filter->best_r->height, NULL);
563 /* Init message element */
564 m = gst_message_new_element (GST_OBJECT (filter), s);
566 gst_element_post_message (GST_ELEMENT (filter), m);
570 * here we use mouse-move event instead of fist-move or palm-move event
571 * !!! this will CHANGE in the future !!!
572 * !!! by adding gst_navigation_send_hand_detect_event() in navigation.c !!!
574 gst_navigation_send_mouse_event (GST_NAVIGATION (filter),
577 (double) (filter->best_r->x + filter->best_r->width * 0.5),
578 (double) (filter->best_r->y + filter->best_r->height * 0.5));
580 /* or use another way to send upstream navigation event for debug
583 * gst_event_new_navigation (gst_structure_new
584 * ("application/x-gst-navigation", "event", G_TYPE_STRING,
586 * "button", G_TYPE_INT, 0,
587 * "pointer_x", G_TYPE_DOUBLE,
588 * (double) (filter->best_r->x + filter->best_r->width * 0.5),
589 * "pointer_y", G_TYPE_DOUBLE,
590 * (double) (filter->best_r->y + filter->best_r->height * 0.5),
592 * gst_pad_send_event (GST_BASE_TRANSFORM_CAST (filter)->srcpad, event);
596 /* Check filter->display,
597 * If TRUE, displaying red circle marker in the out frame */
598 if (filter->display) {
602 cvRound ((filter->best_r->x + filter->best_r->width * 0.5));
604 cvRound ((filter->best_r->y + filter->best_r->height * 0.5));
606 cvRound ((filter->best_r->width + filter->best_r->height) * 0.25);
607 circle (img, center, radius, CV_RGB (0, 0, 200), 1, 8, 0);
613 /* Push out the incoming buffer */
617 static CascadeClassifier *
618 gst_handdetect_load_profile (GstHanddetect * filter, gchar * profile)
620 CascadeClassifier *cascade;
622 cascade = new CascadeClassifier (profile);
623 if (cascade->empty ()) {
624 GST_ERROR_OBJECT (filter, "Invalid profile file: %s", profile);