Imported Upstream version 0.7.5
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <stdbool.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <poll.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
40 #include <pthread.h>
41 #include <sys/mman.h>
42 #include <sys/time.h>
43 #include <libudev.h>
44 #include <errno.h>
45
46 #include "memory.h"
47 #include "debug.h"
48 #include "list.h"
49 #include "uevent.h"
50 #include "vector.h"
51 #include "structs.h"
52 #include "util.h"
53 #include "config.h"
54 #include "blacklist.h"
55 #include "devmapper.h"
56
57 #define MAX_ACCUMULATION_COUNT 2048
58 #define MAX_ACCUMULATION_TIME 30*1000
59 #define MIN_BURST_SPEED 10
60
61 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
62
63 LIST_HEAD(uevq);
64 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
65 pthread_mutex_t *uevq_lockp = &uevq_lock;
66 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
67 pthread_cond_t *uev_condp = &uev_cond;
68 uev_trigger *my_uev_trigger;
69 void * my_trigger_data;
70 int servicing_uev;
71
72 int is_uevent_busy(void)
73 {
74         int empty;
75
76         pthread_mutex_lock(uevq_lockp);
77         empty = list_empty(&uevq);
78         pthread_mutex_unlock(uevq_lockp);
79         return (!empty || servicing_uev);
80 }
81
82 struct uevent * alloc_uevent (void)
83 {
84         struct uevent *uev = MALLOC(sizeof(struct uevent));
85
86         if (uev) {
87                 INIT_LIST_HEAD(&uev->node);
88                 INIT_LIST_HEAD(&uev->merge_node);
89         }
90
91         return uev;
92 }
93
94 void
95 uevq_cleanup(struct list_head *tmpq)
96 {
97         struct uevent *uev, *tmp;
98
99         list_for_each_entry_safe(uev, tmp, tmpq, node) {
100                 list_del_init(&uev->node);
101
102                 if (uev->udev)
103                         udev_device_unref(uev->udev);
104                 FREE(uev);
105         }
106 }
107
108 static const char* uevent_get_env_var(const struct uevent *uev,
109                                       const char *attr)
110 {
111         int i, len;
112         const char *p = NULL;
113
114         if (attr == NULL)
115                 goto invalid;
116
117         len = strlen(attr);
118         if (len == 0)
119                 goto invalid;
120
121         for (i = 0; uev->envp[i] != NULL; i++) {
122                 const char *var = uev->envp[i];
123
124                 if (strlen(var) > len &&
125                     !memcmp(var, attr, len) && var[len] == '=') {
126                         p = var + len + 1;
127                         break;
128                 }
129         }
130
131         condlog(4, "%s: %s -> '%s'", __func__, attr, p);
132         return p;
133
134 invalid:
135         condlog(2, "%s: empty variable name", __func__);
136         return NULL;
137 }
138
139 static int uevent_get_env_positive_int(const struct uevent *uev,
140                                        const char *attr)
141 {
142         const char *p = uevent_get_env_var(uev, attr);
143         char *q;
144         int ret;
145
146         if (p == NULL || *p == '\0')
147                 return -1;
148
149         ret = strtoul(p, &q, 10);
150         if (*q != '\0' || ret < 0) {
151                 condlog(2, "%s: invalid %s: '%s'", __func__, attr, p);
152                 return -1;
153         }
154         return ret;
155 }
156
157 void
158 uevent_get_wwid(struct uevent *uev)
159 {
160         const char *uid_attribute;
161         const char *val;
162         struct config * conf;
163
164         conf = get_multipath_config();
165         uid_attribute = parse_uid_attribute_by_attrs(conf->uid_attrs, uev->kernel);
166         put_multipath_config(conf);
167
168         val = uevent_get_env_var(uev, uid_attribute);
169         if (val)
170                 uev->wwid = val;
171         FREE_CONST(uid_attribute);
172 }
173
174 bool
175 uevent_need_merge(void)
176 {
177         struct config * conf;
178         bool need_merge = false;
179
180         conf = get_multipath_config();
181         if (conf->uid_attrs)
182                 need_merge = true;
183         put_multipath_config(conf);
184
185         return need_merge;
186 }
187
188 bool
189 uevent_can_discard(struct uevent *uev)
190 {
191         struct config * conf;
192
193         /*
194          * do not filter dm devices by devnode
195          */
196         if (!strncmp(uev->kernel, "dm-", 3))
197                 return false;
198         /*
199          * filter paths devices by devnode
200          */
201         conf = get_multipath_config();
202         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
203                            uev->kernel) > 0) {
204                 put_multipath_config(conf);
205                 return true;
206         }
207         put_multipath_config(conf);
208
209         return false;
210 }
211
212 bool
213 uevent_can_filter(struct uevent *earlier, struct uevent *later)
214 {
215
216         /*
217          * filter earlier uvents if path has removed later. Eg:
218          * "add path1 |chang path1 |add path2 |remove path1"
219          * can filter as:
220          * "add path2 |remove path1"
221          * uevents "add path1" and "chang path1" are filtered out
222          */
223         if (!strcmp(earlier->kernel, later->kernel) &&
224                 !strcmp(later->action, "remove") &&
225                 strncmp(later->kernel, "dm-", 3)) {
226                 return true;
227         }
228
229         /*
230          * filter change uvents if add uevents exist. Eg:
231          * "change path1| add path1 |add path2"
232          * can filter as:
233          * "add path1 |add path2"
234          * uevent "chang path1" is filtered out
235          */
236         if (!strcmp(earlier->kernel, later->kernel) &&
237                 !strcmp(earlier->action, "change") &&
238                 !strcmp(later->action, "add") &&
239                 strncmp(later->kernel, "dm-", 3)) {
240                 return true;
241         }
242
243         return false;
244 }
245
246 bool
247 merge_need_stop(struct uevent *earlier, struct uevent *later)
248 {
249         /*
250          * dm uevent do not try to merge with left uevents
251          */
252         if (!strncmp(later->kernel, "dm-", 3))
253                 return true;
254
255         /*
256          * we can not make a jugement without wwid,
257          * so it is sensible to stop merging
258          */
259         if (!earlier->wwid || !later->wwid)
260                 return true;
261         /*
262          * uevents merging stoped
263          * when we meet an opposite action uevent from the same LUN to AVOID
264          * "add path1 |remove path1 |add path2 |remove path2 |add path3"
265          * to merge as "remove path1, path2" and "add path1, path2, path3"
266          * OR
267          * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
268          * to merge as "add path1, path2" and "remove path1, path2, path3"
269          * SO
270          * when we meet a non-change uevent from the same LUN
271          * with the same wwid and different action
272          * it would be better to stop merging.
273          */
274         if (!strcmp(earlier->wwid, later->wwid) &&
275             strcmp(earlier->action, later->action) &&
276             strcmp(earlier->action, "change") &&
277             strcmp(later->action, "change"))
278                 return true;
279
280         return false;
281 }
282
283 bool
284 uevent_can_merge(struct uevent *earlier, struct uevent *later)
285 {
286         /* merge paths uevents
287          * whose wwids exsit and are same
288          * and actions are same,
289          * and actions are addition or deletion
290          */
291         if (earlier->wwid && later->wwid &&
292             !strcmp(earlier->wwid, later->wwid) &&
293             !strcmp(earlier->action, later->action) &&
294             strncmp(earlier->action, "change", 6) &&
295             strncmp(earlier->kernel, "dm-", 3)) {
296                 return true;
297         }
298
299         return false;
300 }
301
302 void
303 uevent_prepare(struct list_head *tmpq)
304 {
305         struct uevent *uev, *tmp;
306
307         list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
308                 if (uevent_can_discard(uev)) {
309                         list_del_init(&uev->node);
310                         if (uev->udev)
311                                 udev_device_unref(uev->udev);
312                         FREE(uev);
313                         continue;
314                 }
315
316                 if (strncmp(uev->kernel, "dm-", 3) &&
317                     uevent_need_merge())
318                         uevent_get_wwid(uev);
319         }
320 }
321
322 void
323 uevent_filter(struct uevent *later, struct list_head *tmpq)
324 {
325         struct uevent *earlier, *tmp;
326
327         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
328                 /*
329                  * filter unnessary earlier uevents
330                  * by the later uevent
331                  */
332                 if (uevent_can_filter(earlier, later)) {
333                         condlog(2, "uevent: %s-%s has filtered by uevent: %s-%s",
334                                 earlier->kernel, earlier->action,
335                                 later->kernel, later->action);
336
337                         list_del_init(&earlier->node);
338                         if (earlier->udev)
339                                 udev_device_unref(earlier->udev);
340                         FREE(earlier);
341                 }
342         }
343 }
344
345 void
346 uevent_merge(struct uevent *later, struct list_head *tmpq)
347 {
348         struct uevent *earlier, *tmp;
349
350         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
351                 if (merge_need_stop(earlier, later))
352                         break;
353                 /*
354                  * merge earlier uevents to the later uevent
355                  */
356                 if (uevent_can_merge(earlier, later)) {
357                         condlog(2, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
358                                 earlier->action, earlier->kernel, earlier->wwid,
359                                 later->action, later->kernel, later->wwid);
360
361                         list_move(&earlier->node, &later->merge_node);
362                 }
363         }
364 }
365
366 void
367 merge_uevq(struct list_head *tmpq)
368 {
369         struct uevent *later;
370
371         uevent_prepare(tmpq);
372         list_for_each_entry_reverse(later, tmpq, node) {
373                 uevent_filter(later, tmpq);
374                 if(uevent_need_merge())
375                         uevent_merge(later, tmpq);
376         }
377 }
378
379 void
380 service_uevq(struct list_head *tmpq)
381 {
382         struct uevent *uev, *tmp;
383
384         list_for_each_entry_safe(uev, tmp, tmpq, node) {
385                 list_del_init(&uev->node);
386
387                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
388                         condlog(0, "uevent trigger error");
389
390                 uevq_cleanup(&uev->merge_node);
391
392                 if (uev->udev)
393                         udev_device_unref(uev->udev);
394                 FREE(uev);
395         }
396 }
397
398 static void uevent_cleanup(void *arg)
399 {
400         struct udev *udev = arg;
401
402         condlog(3, "Releasing uevent_listen() resources");
403         udev_unref(udev);
404 }
405
406 static void monitor_cleanup(void *arg)
407 {
408         struct udev_monitor *monitor = arg;
409
410         condlog(3, "Releasing uevent_monitor() resources");
411         udev_monitor_unref(monitor);
412 }
413
414 /*
415  * Service the uevent queue.
416  */
417 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
418                     void * trigger_data)
419 {
420         my_uev_trigger = uev_trigger;
421         my_trigger_data = trigger_data;
422
423         mlockall(MCL_CURRENT | MCL_FUTURE);
424
425         while (1) {
426                 LIST_HEAD(uevq_tmp);
427
428                 pthread_mutex_lock(uevq_lockp);
429                 servicing_uev = 0;
430                 /*
431                  * Condition signals are unreliable,
432                  * so make sure we only wait if we have to.
433                  */
434                 if (list_empty(&uevq)) {
435                         pthread_cond_wait(uev_condp, uevq_lockp);
436                 }
437                 servicing_uev = 1;
438                 list_splice_init(&uevq, &uevq_tmp);
439                 pthread_mutex_unlock(uevq_lockp);
440                 if (!my_uev_trigger)
441                         break;
442                 merge_uevq(&uevq_tmp);
443                 service_uevq(&uevq_tmp);
444         }
445         condlog(3, "Terminating uev service queue");
446         uevq_cleanup(&uevq);
447         return 0;
448 }
449
450 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
451 {
452         struct uevent *uev;
453         char *buffer;
454         size_t bufpos;
455         int i;
456         char *pos;
457
458         uev = alloc_uevent();
459         if (!uev) {
460                 condlog(1, "lost uevent, oom");
461                 return NULL;
462         }
463
464         if ((size_t)buflen > sizeof(buf)-1)
465                 buflen = sizeof(buf)-1;
466
467         /*
468          * Copy the shared receive buffer contents to buffer private
469          * to this uevent so we can immediately reuse the shared buffer.
470          */
471         memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
472         buffer = uev->buffer;
473         buffer[buflen] = '\0';
474
475         /* save start of payload */
476         bufpos = strlen(buffer) + 1;
477
478         /* action string */
479         uev->action = buffer;
480         pos = strchr(buffer, '@');
481         if (!pos) {
482                 condlog(3, "bad action string '%s'", buffer);
483                 FREE(uev);
484                 return NULL;
485         }
486         pos[0] = '\0';
487
488         /* sysfs path */
489         uev->devpath = &pos[1];
490
491         /* hotplug events have the environment attached - reconstruct envp[] */
492         for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
493                 int keylen;
494                 char *key;
495
496                 key = &buffer[bufpos];
497                 keylen = strlen(key);
498                 uev->envp[i] = key;
499                 /* Filter out sequence number */
500                 if (strncmp(key, "SEQNUM=", 7) == 0) {
501                         char *eptr;
502
503                         uev->seqnum = strtoul(key + 7, &eptr, 10);
504                         if (eptr == key + 7)
505                                 uev->seqnum = -1;
506                 }
507                 bufpos += keylen + 1;
508         }
509         uev->envp[i] = NULL;
510
511         condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
512                 uev->action, uev->devpath);
513         uev->kernel = strrchr(uev->devpath, '/');
514         if (uev->kernel)
515                 uev->kernel++;
516
517         /* print payload environment */
518         for (i = 0; uev->envp[i] != NULL; i++)
519                 condlog(5, "%s", uev->envp[i]);
520
521         return uev;
522 }
523
524 int failback_listen(void)
525 {
526         int sock;
527         struct sockaddr_nl snl;
528         struct sockaddr_un sun;
529         socklen_t addrlen;
530         int retval;
531         int rcvbufsz = 128*1024;
532         int rcvsz = 0;
533         int rcvszsz = sizeof(rcvsz);
534         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
535         const int feature_on = 1;
536         /*
537          * First check whether we have a udev socket
538          */
539         memset(&sun, 0x00, sizeof(struct sockaddr_un));
540         sun.sun_family = AF_LOCAL;
541         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
542         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
543
544         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
545         if (sock >= 0) {
546
547                 condlog(3, "reading events from udev socket.");
548
549                 /* the bind takes care of ensuring only one copy running */
550                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
551                 if (retval < 0) {
552                         condlog(0, "bind failed, exit");
553                         goto exit;
554                 }
555
556                 /* enable receiving of the sender credentials */
557                 retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
558                                     &feature_on, sizeof(feature_on));
559                 if (retval < 0) {
560                         condlog(0, "failed to enable credential passing, exit");
561                         goto exit;
562                 }
563
564         } else {
565                 /* Fallback to read kernel netlink events */
566                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
567                 snl.nl_family = AF_NETLINK;
568                 snl.nl_pid = getpid();
569                 snl.nl_groups = 0x01;
570
571                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
572                 if (sock == -1) {
573                         condlog(0, "error getting socket, exit");
574                         return 1;
575                 }
576
577                 condlog(3, "reading events from kernel.");
578
579                 /*
580                  * try to avoid dropping uevents, even so, this is not a guarantee,
581                  * but it does help to change the netlink uevent socket's
582                  * receive buffer threshold from the default value of 106,496 to
583                  * the maximum value of 262,142.
584                  */
585                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
586                                     sizeof(rcvbufsz));
587
588                 if (retval < 0) {
589                         condlog(0, "error setting receive buffer size for socket, exit");
590                         exit(1);
591                 }
592                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
593                 if (retval < 0) {
594                         condlog(0, "error setting receive buffer size for socket, exit");
595                         exit(1);
596                 }
597                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
598
599                 /* enable receiving of the sender credentials */
600                 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
601                                &feature_on, sizeof(feature_on)) < 0) {
602                         condlog(0, "error on enabling credential passing for socket");
603                         exit(1);
604                 }
605
606                 retval = bind(sock, (struct sockaddr *) &snl,
607                               sizeof(struct sockaddr_nl));
608                 if (retval < 0) {
609                         condlog(0, "bind failed, exit");
610                         goto exit;
611                 }
612         }
613
614         while (1) {
615                 size_t bufpos;
616                 ssize_t buflen;
617                 struct uevent *uev;
618                 struct msghdr smsg;
619                 struct iovec iov;
620                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
621                 struct cmsghdr *cmsg;
622                 struct ucred *cred;
623                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
624
625                 memset(buf, 0x00, sizeof(buf));
626                 iov.iov_base = &buf;
627                 iov.iov_len = sizeof(buf);
628                 memset (&smsg, 0x00, sizeof(struct msghdr));
629                 smsg.msg_iov = &iov;
630                 smsg.msg_iovlen = 1;
631                 smsg.msg_control = cred_msg;
632                 smsg.msg_controllen = sizeof(cred_msg);
633
634                 buflen = recvmsg(sock, &smsg, 0);
635                 if (buflen < 0) {
636                         if (errno != EINTR)
637                                 condlog(0, "error receiving message, errno %d", errno);
638                         continue;
639                 }
640
641                 cmsg = CMSG_FIRSTHDR(&smsg);
642                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
643                         condlog(3, "no sender credentials received, message ignored");
644                         continue;
645                 }
646
647                 cred = (struct ucred *)CMSG_DATA(cmsg);
648                 if (cred->uid != 0) {
649                         condlog(3, "sender uid=%d, message ignored", cred->uid);
650                         continue;
651                 }
652
653                 /* skip header */
654                 bufpos = strlen(buf) + 1;
655                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
656                         condlog(3, "invalid message length");
657                         continue;
658                 }
659
660                 /* check message header */
661                 if (strstr(buf, "@/") == NULL) {
662                         condlog(3, "unrecognized message header");
663                         continue;
664                 }
665                 if ((size_t)buflen > sizeof(buf)-1) {
666                         condlog(2, "buffer overflow for received uevent");
667                         buflen = sizeof(buf)-1;
668                 }
669
670                 uev = uevent_from_buffer(buf, buflen);
671                 if (!uev)
672                         continue;
673                 /*
674                  * Queue uevent and poke service pthread.
675                  */
676                 pthread_mutex_lock(uevq_lockp);
677                 list_add_tail(&uev->node, &uevq);
678                 pthread_cond_signal(uev_condp);
679                 pthread_mutex_unlock(uevq_lockp);
680         }
681
682 exit:
683         close(sock);
684         return 1;
685 }
686
687 struct uevent *uevent_from_udev_device(struct udev_device *dev)
688 {
689         struct uevent *uev;
690         int i = 0;
691         char *pos, *end;
692         struct udev_list_entry *list_entry;
693
694         uev = alloc_uevent();
695         if (!uev) {
696                 udev_device_unref(dev);
697                 condlog(1, "lost uevent, oom");
698                 return NULL;
699         }
700         pos = uev->buffer;
701         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
702         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
703                 const char *name, *value;
704                 int bytes;
705
706                 name = udev_list_entry_get_name(list_entry);
707                 if (!name)
708                         name = "(null)";
709                 value = udev_list_entry_get_value(list_entry);
710                 if (!value)
711                         value = "(null)";
712                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
713                 if (pos + bytes >= end) {
714                         condlog(2, "buffer overflow for uevent");
715                         break;
716                 }
717                 uev->envp[i] = pos;
718                 pos += bytes;
719                 *pos = '\0';
720                 pos++;
721                 if (strcmp(name, "DEVPATH") == 0)
722                         uev->devpath = uev->envp[i] + 8;
723                 if (strcmp(name, "ACTION") == 0)
724                         uev->action = uev->envp[i] + 7;
725                 i++;
726                 if (i == HOTPLUG_NUM_ENVP - 1)
727                         break;
728         }
729         uev->udev = dev;
730         uev->envp[i] = NULL;
731
732         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
733         uev->kernel = strrchr(uev->devpath, '/');
734         if (uev->kernel)
735                 uev->kernel++;
736
737         /* print payload environment */
738         for (i = 0; uev->envp[i] != NULL; i++)
739                 condlog(5, "%s", uev->envp[i]);
740         return uev;
741 }
742
743 bool uevent_burst(struct timeval *start_time, int events)
744 {
745         struct timeval diff_time, end_time;
746         unsigned long speed;
747         unsigned long eclipse_ms;
748
749         if(events > MAX_ACCUMULATION_COUNT) {
750                 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
751                 return false;
752         }
753
754         gettimeofday(&end_time, NULL);
755         timersub(&end_time, start_time, &diff_time);
756
757         eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
758
759         if (eclipse_ms == 0)
760                 return true;
761
762         if (eclipse_ms > MAX_ACCUMULATION_TIME) {
763                 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
764                 return false;
765         }
766
767         speed = (events * 1000) / eclipse_ms;
768         if (speed > MIN_BURST_SPEED)
769                 return true;
770
771         return false;
772 }
773
774 int uevent_listen(struct udev *udev)
775 {
776         int err = 2;
777         struct udev_monitor *monitor = NULL;
778         int fd, socket_flags, events;
779         struct timeval start_time;
780         int need_failback = 1;
781         int timeout = 30;
782         LIST_HEAD(uevlisten_tmp);
783
784         /*
785          * Queue uevents for service by dedicated thread so that the uevent
786          * listening thread does not block on multipathd locks (vecs->lock)
787          * thereby not getting to empty the socket's receive buffer queue
788          * often enough.
789          */
790         if (!udev) {
791                 condlog(1, "no udev context");
792                 return 1;
793         }
794         udev_ref(udev);
795         pthread_cleanup_push(uevent_cleanup, udev);
796
797         monitor = udev_monitor_new_from_netlink(udev, "udev");
798         if (!monitor) {
799                 condlog(2, "failed to create udev monitor");
800                 goto out;
801         }
802         pthread_cleanup_push(monitor_cleanup, monitor);
803 #ifdef LIBUDEV_API_RECVBUF
804         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
805                 condlog(2, "failed to increase buffer size");
806 #endif
807         fd = udev_monitor_get_fd(monitor);
808         if (fd < 0) {
809                 condlog(2, "failed to get monitor fd");
810                 goto out;
811         }
812         socket_flags = fcntl(fd, F_GETFL);
813         if (socket_flags < 0) {
814                 condlog(2, "failed to get monitor socket flags : %s",
815                         strerror(errno));
816                 goto out;
817         }
818         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
819                 condlog(2, "failed to set monitor socket flags : %s",
820                         strerror(errno));
821                 goto out;
822         }
823         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
824                                                               "disk");
825         if (err)
826                 condlog(2, "failed to create filter : %s", strerror(-err));
827         err = udev_monitor_enable_receiving(monitor);
828         if (err) {
829                 condlog(2, "failed to enable receiving : %s", strerror(-err));
830                 goto out;
831         }
832
833         events = 0;
834         gettimeofday(&start_time, NULL);
835         while (1) {
836                 struct uevent *uev;
837                 struct udev_device *dev;
838                 struct pollfd ev_poll;
839                 int poll_timeout;
840                 int fdcount;
841
842                 memset(&ev_poll, 0, sizeof(struct pollfd));
843                 ev_poll.fd = fd;
844                 ev_poll.events = POLLIN;
845                 poll_timeout = timeout * 1000;
846                 errno = 0;
847                 fdcount = poll(&ev_poll, 1, poll_timeout);
848                 if (fdcount && ev_poll.revents & POLLIN) {
849                         timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
850                         dev = udev_monitor_receive_device(monitor);
851                         if (!dev) {
852                                 condlog(0, "failed getting udev device");
853                                 continue;
854                         }
855                         uev = uevent_from_udev_device(dev);
856                         if (!uev)
857                                 continue;
858                         list_add_tail(&uev->node, &uevlisten_tmp);
859                         events++;
860                         continue;
861                 }
862                 if (fdcount < 0) {
863                         if (errno == EINTR)
864                                 continue;
865
866                         condlog(0, "error receiving "
867                                 "uevent message: %m");
868                         err = -errno;
869                         break;
870                 }
871                 if (!list_empty(&uevlisten_tmp)) {
872                         /*
873                          * Queue uevents and poke service pthread.
874                          */
875                         condlog(3, "Forwarding %d uevents", events);
876                         pthread_mutex_lock(uevq_lockp);
877                         list_splice_tail_init(&uevlisten_tmp, &uevq);
878                         pthread_cond_signal(uev_condp);
879                         pthread_mutex_unlock(uevq_lockp);
880                         events = 0;
881                 }
882                 gettimeofday(&start_time, NULL);
883                 timeout = 30;
884         }
885         need_failback = 0;
886 out:
887         if (monitor)
888                 pthread_cleanup_pop(1);
889         if (need_failback)
890                 err = failback_listen();
891         pthread_cleanup_pop(1);
892         return err;
893 }
894
895 int uevent_get_major(const struct uevent *uev)
896 {
897         return uevent_get_env_positive_int(uev, "MAJOR");
898 }
899
900 int uevent_get_minor(const struct uevent *uev)
901 {
902         return uevent_get_env_positive_int(uev, "MINOR");
903 }
904
905 int uevent_get_disk_ro(const struct uevent *uev)
906 {
907         return uevent_get_env_positive_int(uev, "DISK_RO");
908 }
909
910 static const char *uevent_get_dm_str(const struct uevent *uev, char *attr)
911 {
912         const char *tmp = uevent_get_env_var(uev, attr);
913
914         if (tmp == NULL)
915                 return NULL;
916         return strdup(tmp);
917 }
918
919 const char *uevent_get_dm_name(const struct uevent *uev)
920 {
921         return uevent_get_dm_str(uev, "DM_NAME");
922 }
923
924 const char *uevent_get_dm_path(const struct uevent *uev)
925 {
926         return uevent_get_dm_str(uev, "DM_PATH");
927 }
928
929 const char *uevent_get_dm_action(const struct uevent *uev)
930 {
931         return uevent_get_dm_str(uev, "DM_ACTION");
932 }
933
934 bool uevent_is_mpath(const struct uevent *uev)
935 {
936         const char *uuid = uevent_get_env_var(uev, "DM_UUID");
937
938         if (uuid == NULL)
939                 return false;
940         if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN))
941                 return false;
942         return uuid[UUID_PREFIX_LEN] != '\0';
943 }