Imported Upstream version 0.7.0
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <stdbool.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <poll.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
40 #include <pthread.h>
41 #include <sys/mman.h>
42 #include <sys/time.h>
43 #include <libudev.h>
44 #include <errno.h>
45
46 #include "memory.h"
47 #include "debug.h"
48 #include "list.h"
49 #include "uevent.h"
50 #include "vector.h"
51 #include "structs.h"
52 #include "util.h"
53 #include "config.h"
54 #include "blacklist.h"
55
56 #define MAX_ACCUMULATION_COUNT 2048
57 #define MAX_ACCUMULATION_TIME 30*1000
58 #define MIN_BURST_SPEED 10
59
60 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
61
62 LIST_HEAD(uevq);
63 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
64 pthread_mutex_t *uevq_lockp = &uevq_lock;
65 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
66 pthread_cond_t *uev_condp = &uev_cond;
67 uev_trigger *my_uev_trigger;
68 void * my_trigger_data;
69 int servicing_uev;
70
71 int is_uevent_busy(void)
72 {
73         int empty;
74
75         pthread_mutex_lock(uevq_lockp);
76         empty = list_empty(&uevq);
77         pthread_mutex_unlock(uevq_lockp);
78         return (!empty || servicing_uev);
79 }
80
81 struct uevent * alloc_uevent (void)
82 {
83         struct uevent *uev = MALLOC(sizeof(struct uevent));
84
85         if (uev) {
86                 INIT_LIST_HEAD(&uev->node);
87                 INIT_LIST_HEAD(&uev->merge_node);
88         }
89
90         return uev;
91 }
92
93 void
94 uevq_cleanup(struct list_head *tmpq)
95 {
96         struct uevent *uev, *tmp;
97
98         list_for_each_entry_safe(uev, tmp, tmpq, node) {
99                 list_del_init(&uev->node);
100
101                 if (uev->udev)
102                         udev_device_unref(uev->udev);
103                 FREE(uev);
104         }
105 }
106
107 void
108 uevent_get_wwid(struct uevent *uev)
109 {
110         int i;
111         char *uid_attribute;
112         struct config * conf;
113
114         conf = get_multipath_config();
115         uid_attribute = parse_uid_attribute_by_attrs(conf->uid_attrs, uev->kernel);
116         put_multipath_config(conf);
117
118         if (!uid_attribute)
119                 return;
120
121         for (i = 0; uev->envp[i] != NULL; i++) {
122                 if (!strncmp(uev->envp[i], uid_attribute, strlen(uid_attribute)) &&
123                     strlen(uev->envp[i]) > strlen(uid_attribute) &&
124                     uev->envp[i][strlen(uid_attribute)] == '=') {
125                         uev->wwid = uev->envp[i] + strlen(uid_attribute) + 1;
126                         break;
127                 }
128         }
129         free(uid_attribute);
130 }
131
132 bool
133 uevent_need_merge(void)
134 {
135         struct config * conf;
136         bool need_merge = false;
137
138         conf = get_multipath_config();
139         if (conf->uid_attrs)
140                 need_merge = true;
141         put_multipath_config(conf);
142
143         return need_merge;
144 }
145
146 static bool
147 uevent_can_discard_by_devpath(const char *devpath)
148 {
149         static const char BLOCK[] = "/block/";
150         const char *tmp = strstr(devpath, BLOCK);
151
152         if (tmp == NULL) {
153                 condlog(4, "no /block/ in '%s'", devpath);
154                 return true;
155         }
156         tmp += sizeof(BLOCK) - 1;
157         if (*tmp == '\0')
158                 /* just ".../block/" - discard */
159                 return true;
160         /*
161          * If there are more path elements after ".../block/xyz",
162          * it's a partition - discard it; but don't discard ".../block/sda/".
163          */
164         tmp = strchr(tmp, '/');
165         return tmp != NULL && *(tmp + 1) != '\0';
166 }
167
168 bool
169 uevent_can_discard(struct uevent *uev)
170 {
171         struct config * conf;
172
173         if (uevent_can_discard_by_devpath(uev->devpath))
174                 return true;
175
176         /*
177          * do not filter dm devices by devnode
178          */
179         if (!strncmp(uev->kernel, "dm-", 3))
180                 return false;
181         /*
182          * filter paths devices by devnode
183          */
184         conf = get_multipath_config();
185         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
186                            uev->kernel) > 0) {
187                 put_multipath_config(conf);
188                 return true;
189         }
190         put_multipath_config(conf);
191
192         return false;
193 }
194
195 bool
196 uevent_can_filter(struct uevent *earlier, struct uevent *later)
197 {
198
199         /*
200          * filter earlier uvents if path has removed later. Eg:
201          * "add path1 |chang path1 |add path2 |remove path1"
202          * can filter as:
203          * "add path2 |remove path1"
204          * uevents "add path1" and "chang path1" are filtered out
205          */
206         if (!strcmp(earlier->kernel, later->kernel) &&
207                 !strcmp(later->action, "remove") &&
208                 strncmp(later->kernel, "dm-", 3)) {
209                 return true;
210         }
211
212         /*
213          * filter change uvents if add uevents exist. Eg:
214          * "change path1| add path1 |add path2"
215          * can filter as:
216          * "add path1 |add path2"
217          * uevent "chang path1" is filtered out
218          */
219         if (!strcmp(earlier->kernel, later->kernel) &&
220                 !strcmp(earlier->action, "change") &&
221                 !strcmp(later->action, "add") &&
222                 strncmp(later->kernel, "dm-", 3)) {
223                 return true;
224         }
225
226         return false;
227 }
228
229 bool
230 merge_need_stop(struct uevent *earlier, struct uevent *later)
231 {
232         /*
233          * dm uevent do not try to merge with left uevents
234          */
235         if (!strncmp(later->kernel, "dm-", 3))
236                 return true;
237
238         /*
239          * we can not make a jugement without wwid,
240          * so it is sensible to stop merging
241          */
242         if (!earlier->wwid || !later->wwid)
243                 return true;
244         /*
245          * uevents merging stoped
246          * when we meet an opposite action uevent from the same LUN to AVOID
247          * "add path1 |remove path1 |add path2 |remove path2 |add path3"
248          * to merge as "remove path1, path2" and "add path1, path2, path3"
249          * OR
250          * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
251          * to merge as "add path1, path2" and "remove path1, path2, path3"
252          * SO
253          * when we meet a non-change uevent from the same LUN
254          * with the same wwid and different action
255          * it would be better to stop merging.
256          */
257         if (!strcmp(earlier->wwid, later->wwid) &&
258             strcmp(earlier->action, later->action) &&
259             strcmp(earlier->action, "change") &&
260             strcmp(later->action, "change"))
261                 return true;
262
263         return false;
264 }
265
266 bool
267 uevent_can_merge(struct uevent *earlier, struct uevent *later)
268 {
269         /* merge paths uevents
270          * whose wwids exsit and are same
271          * and actions are same,
272          * and actions are addition or deletion
273          */
274         if (earlier->wwid && later->wwid &&
275             !strcmp(earlier->wwid, later->wwid) &&
276             !strcmp(earlier->action, later->action) &&
277             strncmp(earlier->action, "change", 6) &&
278             strncmp(earlier->kernel, "dm-", 3)) {
279                 return true;
280         }
281
282         return false;
283 }
284
285 void
286 uevent_prepare(struct list_head *tmpq)
287 {
288         struct uevent *uev, *tmp;
289
290         list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
291                 if (uevent_can_discard(uev)) {
292                         list_del_init(&uev->node);
293                         if (uev->udev)
294                                 udev_device_unref(uev->udev);
295                         FREE(uev);
296                         continue;
297                 }
298
299                 if (strncmp(uev->kernel, "dm-", 3) &&
300                     uevent_need_merge())
301                         uevent_get_wwid(uev);
302         }
303 }
304
305 void
306 uevent_filter(struct uevent *later, struct list_head *tmpq)
307 {
308         struct uevent *earlier, *tmp;
309
310         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
311                 /*
312                  * filter unnessary earlier uevents
313                  * by the later uevent
314                  */
315                 if (uevent_can_filter(earlier, later)) {
316                         condlog(2, "uevent: %s-%s has filtered by uevent: %s-%s",
317                                 earlier->kernel, earlier->action,
318                                 later->kernel, later->action);
319
320                         list_del_init(&earlier->node);
321                         if (earlier->udev)
322                                 udev_device_unref(earlier->udev);
323                         FREE(earlier);
324                 }
325         }
326 }
327
328 void
329 uevent_merge(struct uevent *later, struct list_head *tmpq)
330 {
331         struct uevent *earlier, *tmp;
332
333         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
334                 if (merge_need_stop(earlier, later))
335                         break;
336                 /*
337                  * merge earlier uevents to the later uevent
338                  */
339                 if (uevent_can_merge(earlier, later)) {
340                         condlog(2, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
341                                 earlier->action, earlier->kernel, earlier->wwid,
342                                 later->action, later->kernel, later->wwid);
343
344                         list_move(&earlier->node, &later->merge_node);
345                 }
346         }
347 }
348
349 void
350 merge_uevq(struct list_head *tmpq)
351 {
352         struct uevent *later;
353
354         uevent_prepare(tmpq);
355         list_for_each_entry_reverse(later, tmpq, node) {
356                 uevent_filter(later, tmpq);
357                 if(uevent_need_merge())
358                         uevent_merge(later, tmpq);
359         }
360 }
361
362 void
363 service_uevq(struct list_head *tmpq)
364 {
365         struct uevent *uev, *tmp;
366
367         list_for_each_entry_safe(uev, tmp, tmpq, node) {
368                 list_del_init(&uev->node);
369
370                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
371                         condlog(0, "uevent trigger error");
372
373                 uevq_cleanup(&uev->merge_node);
374
375                 if (uev->udev)
376                         udev_device_unref(uev->udev);
377                 FREE(uev);
378         }
379 }
380
381 static void uevent_cleanup(void *arg)
382 {
383         struct udev *udev = arg;
384
385         condlog(3, "Releasing uevent_listen() resources");
386         udev_unref(udev);
387 }
388
389 /*
390  * Service the uevent queue.
391  */
392 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
393                     void * trigger_data)
394 {
395         my_uev_trigger = uev_trigger;
396         my_trigger_data = trigger_data;
397
398         mlockall(MCL_CURRENT | MCL_FUTURE);
399
400         while (1) {
401                 LIST_HEAD(uevq_tmp);
402
403                 pthread_mutex_lock(uevq_lockp);
404                 servicing_uev = 0;
405                 /*
406                  * Condition signals are unreliable,
407                  * so make sure we only wait if we have to.
408                  */
409                 if (list_empty(&uevq)) {
410                         pthread_cond_wait(uev_condp, uevq_lockp);
411                 }
412                 servicing_uev = 1;
413                 list_splice_init(&uevq, &uevq_tmp);
414                 pthread_mutex_unlock(uevq_lockp);
415                 if (!my_uev_trigger)
416                         break;
417                 merge_uevq(&uevq_tmp);
418                 service_uevq(&uevq_tmp);
419         }
420         condlog(3, "Terminating uev service queue");
421         uevq_cleanup(&uevq);
422         return 0;
423 }
424
425 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
426 {
427         struct uevent *uev;
428         char *buffer;
429         size_t bufpos;
430         int i;
431         char *pos;
432
433         uev = alloc_uevent();
434         if (!uev) {
435                 condlog(1, "lost uevent, oom");
436                 return NULL;
437         }
438
439         if ((size_t)buflen > sizeof(buf)-1)
440                 buflen = sizeof(buf)-1;
441
442         /*
443          * Copy the shared receive buffer contents to buffer private
444          * to this uevent so we can immediately reuse the shared buffer.
445          */
446         memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
447         buffer = uev->buffer;
448         buffer[buflen] = '\0';
449
450         /* save start of payload */
451         bufpos = strlen(buffer) + 1;
452
453         /* action string */
454         uev->action = buffer;
455         pos = strchr(buffer, '@');
456         if (!pos) {
457                 condlog(3, "bad action string '%s'", buffer);
458                 FREE(uev);
459                 return NULL;
460         }
461         pos[0] = '\0';
462
463         /* sysfs path */
464         uev->devpath = &pos[1];
465
466         /* hotplug events have the environment attached - reconstruct envp[] */
467         for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
468                 int keylen;
469                 char *key;
470
471                 key = &buffer[bufpos];
472                 keylen = strlen(key);
473                 uev->envp[i] = key;
474                 /* Filter out sequence number */
475                 if (strncmp(key, "SEQNUM=", 7) == 0) {
476                         char *eptr;
477
478                         uev->seqnum = strtoul(key + 7, &eptr, 10);
479                         if (eptr == key + 7)
480                                 uev->seqnum = -1;
481                 }
482                 bufpos += keylen + 1;
483         }
484         uev->envp[i] = NULL;
485
486         condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
487                 uev->action, uev->devpath);
488         uev->kernel = strrchr(uev->devpath, '/');
489         if (uev->kernel)
490                 uev->kernel++;
491
492         /* print payload environment */
493         for (i = 0; uev->envp[i] != NULL; i++)
494                 condlog(5, "%s", uev->envp[i]);
495
496         return uev;
497 }
498
499 int failback_listen(void)
500 {
501         int sock;
502         struct sockaddr_nl snl;
503         struct sockaddr_un sun;
504         socklen_t addrlen;
505         int retval;
506         int rcvbufsz = 128*1024;
507         int rcvsz = 0;
508         int rcvszsz = sizeof(rcvsz);
509         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
510         const int feature_on = 1;
511         /*
512          * First check whether we have a udev socket
513          */
514         memset(&sun, 0x00, sizeof(struct sockaddr_un));
515         sun.sun_family = AF_LOCAL;
516         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
517         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
518
519         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
520         if (sock >= 0) {
521
522                 condlog(3, "reading events from udev socket.");
523
524                 /* the bind takes care of ensuring only one copy running */
525                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
526                 if (retval < 0) {
527                         condlog(0, "bind failed, exit");
528                         goto exit;
529                 }
530
531                 /* enable receiving of the sender credentials */
532                 retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
533                                     &feature_on, sizeof(feature_on));
534                 if (retval < 0) {
535                         condlog(0, "failed to enable credential passing, exit");
536                         goto exit;
537                 }
538
539         } else {
540                 /* Fallback to read kernel netlink events */
541                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
542                 snl.nl_family = AF_NETLINK;
543                 snl.nl_pid = getpid();
544                 snl.nl_groups = 0x01;
545
546                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
547                 if (sock == -1) {
548                         condlog(0, "error getting socket, exit");
549                         return 1;
550                 }
551
552                 condlog(3, "reading events from kernel.");
553
554                 /*
555                  * try to avoid dropping uevents, even so, this is not a guarantee,
556                  * but it does help to change the netlink uevent socket's
557                  * receive buffer threshold from the default value of 106,496 to
558                  * the maximum value of 262,142.
559                  */
560                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
561                                     sizeof(rcvbufsz));
562
563                 if (retval < 0) {
564                         condlog(0, "error setting receive buffer size for socket, exit");
565                         exit(1);
566                 }
567                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
568                 if (retval < 0) {
569                         condlog(0, "error setting receive buffer size for socket, exit");
570                         exit(1);
571                 }
572                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
573
574                 /* enable receiving of the sender credentials */
575                 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
576                                &feature_on, sizeof(feature_on)) < 0) {
577                         condlog(0, "error on enabling credential passing for socket");
578                         exit(1);
579                 }
580
581                 retval = bind(sock, (struct sockaddr *) &snl,
582                               sizeof(struct sockaddr_nl));
583                 if (retval < 0) {
584                         condlog(0, "bind failed, exit");
585                         goto exit;
586                 }
587         }
588
589         while (1) {
590                 size_t bufpos;
591                 ssize_t buflen;
592                 struct uevent *uev;
593                 struct msghdr smsg;
594                 struct iovec iov;
595                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
596                 struct cmsghdr *cmsg;
597                 struct ucred *cred;
598                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
599
600                 memset(buf, 0x00, sizeof(buf));
601                 iov.iov_base = &buf;
602                 iov.iov_len = sizeof(buf);
603                 memset (&smsg, 0x00, sizeof(struct msghdr));
604                 smsg.msg_iov = &iov;
605                 smsg.msg_iovlen = 1;
606                 smsg.msg_control = cred_msg;
607                 smsg.msg_controllen = sizeof(cred_msg);
608
609                 buflen = recvmsg(sock, &smsg, 0);
610                 if (buflen < 0) {
611                         if (errno != EINTR)
612                                 condlog(0, "error receiving message, errno %d", errno);
613                         continue;
614                 }
615
616                 cmsg = CMSG_FIRSTHDR(&smsg);
617                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
618                         condlog(3, "no sender credentials received, message ignored");
619                         continue;
620                 }
621
622                 cred = (struct ucred *)CMSG_DATA(cmsg);
623                 if (cred->uid != 0) {
624                         condlog(3, "sender uid=%d, message ignored", cred->uid);
625                         continue;
626                 }
627
628                 /* skip header */
629                 bufpos = strlen(buf) + 1;
630                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
631                         condlog(3, "invalid message length");
632                         continue;
633                 }
634
635                 /* check message header */
636                 if (strstr(buf, "@/") == NULL) {
637                         condlog(3, "unrecognized message header");
638                         continue;
639                 }
640                 if ((size_t)buflen > sizeof(buf)-1) {
641                         condlog(2, "buffer overflow for received uevent");
642                         buflen = sizeof(buf)-1;
643                 }
644
645                 uev = uevent_from_buffer(buf, buflen);
646                 if (!uev)
647                         continue;
648                 /*
649                  * Queue uevent and poke service pthread.
650                  */
651                 pthread_mutex_lock(uevq_lockp);
652                 list_add_tail(&uev->node, &uevq);
653                 pthread_cond_signal(uev_condp);
654                 pthread_mutex_unlock(uevq_lockp);
655         }
656
657 exit:
658         close(sock);
659         return 1;
660 }
661
662 struct uevent *uevent_from_udev_device(struct udev_device *dev)
663 {
664         struct uevent *uev;
665         int i = 0;
666         char *pos, *end;
667         struct udev_list_entry *list_entry;
668
669         uev = alloc_uevent();
670         if (!uev) {
671                 udev_device_unref(dev);
672                 condlog(1, "lost uevent, oom");
673                 return NULL;
674         }
675         pos = uev->buffer;
676         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
677         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
678                 const char *name, *value;
679                 int bytes;
680
681                 name = udev_list_entry_get_name(list_entry);
682                 if (!name)
683                         name = "(null)";
684                 value = udev_list_entry_get_value(list_entry);
685                 if (!value)
686                         value = "(null)";
687                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
688                 if (pos + bytes >= end) {
689                         condlog(2, "buffer overflow for uevent");
690                         break;
691                 }
692                 uev->envp[i] = pos;
693                 pos += bytes;
694                 *pos = '\0';
695                 pos++;
696                 if (strcmp(name, "DEVPATH") == 0)
697                         uev->devpath = uev->envp[i] + 8;
698                 if (strcmp(name, "ACTION") == 0)
699                         uev->action = uev->envp[i] + 7;
700                 i++;
701                 if (i == HOTPLUG_NUM_ENVP - 1)
702                         break;
703         }
704         uev->udev = dev;
705         uev->envp[i] = NULL;
706
707         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
708         uev->kernel = strrchr(uev->devpath, '/');
709         if (uev->kernel)
710                 uev->kernel++;
711
712         /* print payload environment */
713         for (i = 0; uev->envp[i] != NULL; i++)
714                 condlog(5, "%s", uev->envp[i]);
715         return uev;
716 }
717
718 bool uevent_burst(struct timeval *start_time, int events)
719 {
720         struct timeval diff_time, end_time;
721         unsigned long speed;
722         unsigned long eclipse_ms;
723
724         if(events > MAX_ACCUMULATION_COUNT) {
725                 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
726                 return false;
727         }
728
729         gettimeofday(&end_time, NULL);
730         timersub(&end_time, start_time, &diff_time);
731
732         eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
733
734         if (eclipse_ms == 0)
735                 return true;
736
737         if (eclipse_ms > MAX_ACCUMULATION_TIME) {
738                 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
739                 return false;
740         }
741
742         speed = (events * 1000) / eclipse_ms;
743         if (speed > MIN_BURST_SPEED)
744                 return true;
745
746         return false;
747 }
748
749 int uevent_listen(struct udev *udev)
750 {
751         int err = 2;
752         struct udev_monitor *monitor = NULL;
753         int fd, socket_flags, events;
754         struct timeval start_time;
755         int need_failback = 1;
756         int timeout = 30;
757         LIST_HEAD(uevlisten_tmp);
758
759         /*
760          * Queue uevents for service by dedicated thread so that the uevent
761          * listening thread does not block on multipathd locks (vecs->lock)
762          * thereby not getting to empty the socket's receive buffer queue
763          * often enough.
764          */
765         if (!udev) {
766                 condlog(1, "no udev context");
767                 return 1;
768         }
769         udev_ref(udev);
770         pthread_cleanup_push(uevent_cleanup, udev);
771
772         monitor = udev_monitor_new_from_netlink(udev, "udev");
773         if (!monitor) {
774                 condlog(2, "failed to create udev monitor");
775                 goto out;
776         }
777 #ifdef LIBUDEV_API_RECVBUF
778         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
779                 condlog(2, "failed to increase buffer size");
780 #endif
781         fd = udev_monitor_get_fd(monitor);
782         if (fd < 0) {
783                 condlog(2, "failed to get monitor fd");
784                 goto out;
785         }
786         socket_flags = fcntl(fd, F_GETFL);
787         if (socket_flags < 0) {
788                 condlog(2, "failed to get monitor socket flags : %s",
789                         strerror(errno));
790                 goto out;
791         }
792         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
793                 condlog(2, "failed to set monitor socket flags : %s",
794                         strerror(errno));
795                 goto out;
796         }
797         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
798                                                               NULL);
799         if (err)
800                 condlog(2, "failed to create filter : %s", strerror(-err));
801         err = udev_monitor_enable_receiving(monitor);
802         if (err) {
803                 condlog(2, "failed to enable receiving : %s", strerror(-err));
804                 goto out;
805         }
806
807         events = 0;
808         gettimeofday(&start_time, NULL);
809         while (1) {
810                 struct uevent *uev;
811                 struct udev_device *dev;
812                 struct pollfd ev_poll;
813                 int poll_timeout;
814                 int fdcount;
815
816                 memset(&ev_poll, 0, sizeof(struct pollfd));
817                 ev_poll.fd = fd;
818                 ev_poll.events = POLLIN;
819                 poll_timeout = timeout * 1000;
820                 errno = 0;
821                 fdcount = poll(&ev_poll, 1, poll_timeout);
822                 if (fdcount && ev_poll.revents & POLLIN) {
823                         timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
824                         dev = udev_monitor_receive_device(monitor);
825                         if (!dev) {
826                                 condlog(0, "failed getting udev device");
827                                 continue;
828                         }
829                         uev = uevent_from_udev_device(dev);
830                         if (!uev)
831                                 continue;
832                         list_add_tail(&uev->node, &uevlisten_tmp);
833                         events++;
834                         continue;
835                 }
836                 if (fdcount < 0) {
837                         if (errno == EINTR)
838                                 continue;
839
840                         condlog(0, "error receiving "
841                                 "uevent message: %m");
842                         err = -errno;
843                         break;
844                 }
845                 if (!list_empty(&uevlisten_tmp)) {
846                         /*
847                          * Queue uevents and poke service pthread.
848                          */
849                         condlog(3, "Forwarding %d uevents", events);
850                         pthread_mutex_lock(uevq_lockp);
851                         list_splice_tail_init(&uevlisten_tmp, &uevq);
852                         pthread_cond_signal(uev_condp);
853                         pthread_mutex_unlock(uevq_lockp);
854                         events = 0;
855                 }
856                 gettimeofday(&start_time, NULL);
857                 timeout = 30;
858         }
859         need_failback = 0;
860 out:
861         if (monitor)
862                 udev_monitor_unref(monitor);
863         if (need_failback)
864                 err = failback_listen();
865         pthread_cleanup_pop(1);
866         return err;
867 }
868
869 int uevent_get_major(struct uevent *uev)
870 {
871         char *p, *q;
872         int i, major = -1;
873
874         for (i = 0; uev->envp[i] != NULL; i++) {
875                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
876                         p = uev->envp[i] + 6;
877                         major = strtoul(p, &q, 10);
878                         if (p == q) {
879                                 condlog(2, "invalid major '%s'", p);
880                                 major = -1;
881                         }
882                         break;
883                 }
884         }
885         return major;
886 }
887
888 int uevent_get_minor(struct uevent *uev)
889 {
890         char *p, *q;
891         int i, minor = -1;
892
893         for (i = 0; uev->envp[i] != NULL; i++) {
894                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
895                         p = uev->envp[i] + 6;
896                         minor = strtoul(p, &q, 10);
897                         if (p == q) {
898                                 condlog(2, "invalid minor '%s'", p);
899                                 minor = -1;
900                         }
901                         break;
902                 }
903         }
904         return minor;
905 }
906
907 int uevent_get_disk_ro(struct uevent *uev)
908 {
909         char *p, *q;
910         int i, ro = -1;
911
912         for (i = 0; uev->envp[i] != NULL; i++) {
913                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
914                         p = uev->envp[i] + 8;
915                         ro = strtoul(p, &q, 10);
916                         if (p == q) {
917                                 condlog(2, "invalid read_only setting '%s'", p);
918                                 ro = -1;
919                         }
920                         break;
921                 }
922         }
923         return ro;
924 }
925
926 char *uevent_get_dm_name(struct uevent *uev)
927 {
928         char *p = NULL;
929         int i;
930
931         for (i = 0; uev->envp[i] != NULL; i++) {
932                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
933                     strlen(uev->envp[i]) > 7) {
934                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
935                         strcpy(p, uev->envp[i] + 8);
936                         break;
937                 }
938         }
939         return p;
940 }