Imported Upstream version 0.6.1
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <sys/poll.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
40 #include <pthread.h>
41 #include <signal.h>
42 #include <limits.h>
43 #include <sys/mman.h>
44 #include <libudev.h>
45 #include <errno.h>
46
47 #include "memory.h"
48 #include "debug.h"
49 #include "list.h"
50 #include "uevent.h"
51 #include "vector.h"
52
53 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
54
55 pthread_t uevq_thr;
56 LIST_HEAD(uevq);
57 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
58 pthread_mutex_t *uevq_lockp = &uevq_lock;
59 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
60 pthread_cond_t *uev_condp = &uev_cond;
61 uev_trigger *my_uev_trigger;
62 void * my_trigger_data;
63 int servicing_uev;
64
65 int is_uevent_busy(void)
66 {
67         int empty;
68
69         pthread_mutex_lock(uevq_lockp);
70         empty = list_empty(&uevq);
71         pthread_mutex_unlock(uevq_lockp);
72         return (!empty || servicing_uev);
73 }
74
75 struct uevent * alloc_uevent (void)
76 {
77         struct uevent *uev = MALLOC(sizeof(struct uevent));
78
79         if (uev)
80                 INIT_LIST_HEAD(&uev->node);
81
82         return uev;
83 }
84
85 void
86 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
87 {
88         if (pthread_attr_init(attr)) {
89                 fprintf(stderr, "can't initialize thread attr: %s\n",
90                         strerror(errno));
91                 exit(1);
92         }
93         if (stacksize < PTHREAD_STACK_MIN)
94                 stacksize = PTHREAD_STACK_MIN;
95
96         if (pthread_attr_setstacksize(attr, stacksize)) {
97                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
98                         (unsigned long)stacksize, strerror(errno));
99                 exit(1);
100         }
101         if (detached && pthread_attr_setdetachstate(attr,
102                                                     PTHREAD_CREATE_DETACHED)) {
103                 fprintf(stderr, "can't set thread to detached: %s\n",
104                         strerror(errno));
105                 exit(1);
106         }
107 }
108
109 /*
110  * Called with uevq_lockp held
111  */
112 void
113 service_uevq(struct list_head *tmpq)
114 {
115         struct uevent *uev, *tmp;
116
117         list_for_each_entry_safe(uev, tmp, tmpq, node) {
118                 list_del_init(&uev->node);
119
120                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
121                         condlog(0, "uevent trigger error");
122
123                 if (uev->udev)
124                         udev_device_unref(uev->udev);
125                 FREE(uev);
126         }
127 }
128
129 static void uevq_stop(void *arg)
130 {
131         struct udev *udev = arg;
132
133         condlog(3, "Stopping uev queue");
134         pthread_mutex_lock(uevq_lockp);
135         my_uev_trigger = NULL;
136         pthread_cond_signal(uev_condp);
137         pthread_mutex_unlock(uevq_lockp);
138         udev_unref(udev);
139 }
140
141 void
142 uevq_cleanup(struct list_head *tmpq)
143 {
144         struct uevent *uev, *tmp;
145
146         list_for_each_entry_safe(uev, tmp, tmpq, node) {
147                 list_del_init(&uev->node);
148                 FREE(uev);
149         }
150 }
151
152 /*
153  * Service the uevent queue.
154  */
155 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
156                     void * trigger_data)
157 {
158         my_uev_trigger = uev_trigger;
159         my_trigger_data = trigger_data;
160
161         mlockall(MCL_CURRENT | MCL_FUTURE);
162
163         while (1) {
164                 LIST_HEAD(uevq_tmp);
165
166                 pthread_mutex_lock(uevq_lockp);
167                 servicing_uev = 0;
168                 /*
169                  * Condition signals are unreliable,
170                  * so make sure we only wait if we have to.
171                  */
172                 if (list_empty(&uevq)) {
173                         pthread_cond_wait(uev_condp, uevq_lockp);
174                 }
175                 servicing_uev = 1;
176                 list_splice_init(&uevq, &uevq_tmp);
177                 pthread_mutex_unlock(uevq_lockp);
178                 if (!my_uev_trigger)
179                         break;
180                 service_uevq(&uevq_tmp);
181         }
182         condlog(3, "Terminating uev service queue");
183         uevq_cleanup(&uevq);
184         return 0;
185 }
186
187 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
188 {
189         struct uevent *uev;
190         char *buffer;
191         size_t bufpos;
192         int i;
193         char *pos;
194
195         uev = alloc_uevent();
196         if (!uev) {
197                 condlog(1, "lost uevent, oom");
198                 return NULL;
199         }
200
201         if ((size_t)buflen > sizeof(buf)-1)
202                 buflen = sizeof(buf)-1;
203
204         /*
205          * Copy the shared receive buffer contents to buffer private
206          * to this uevent so we can immediately reuse the shared buffer.
207          */
208         memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
209         buffer = uev->buffer;
210         buffer[buflen] = '\0';
211
212         /* save start of payload */
213         bufpos = strlen(buffer) + 1;
214
215         /* action string */
216         uev->action = buffer;
217         pos = strchr(buffer, '@');
218         if (!pos) {
219                 condlog(3, "bad action string '%s'", buffer);
220                 FREE(uev);
221                 return NULL;
222         }
223         pos[0] = '\0';
224
225         /* sysfs path */
226         uev->devpath = &pos[1];
227
228         /* hotplug events have the environment attached - reconstruct envp[] */
229         for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
230                 int keylen;
231                 char *key;
232
233                 key = &buffer[bufpos];
234                 keylen = strlen(key);
235                 uev->envp[i] = key;
236                 /* Filter out sequence number */
237                 if (strncmp(key, "SEQNUM=", 7) == 0) {
238                         char *eptr;
239
240                         uev->seqnum = strtoul(key + 7, &eptr, 10);
241                         if (eptr == key + 7)
242                                 uev->seqnum = -1;
243                 }
244                 bufpos += keylen + 1;
245         }
246         uev->envp[i] = NULL;
247
248         condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
249                 uev->action, uev->devpath);
250         uev->kernel = strrchr(uev->devpath, '/');
251         if (uev->kernel)
252                 uev->kernel++;
253
254         /* print payload environment */
255         for (i = 0; uev->envp[i] != NULL; i++)
256                 condlog(5, "%s", uev->envp[i]);
257
258         return uev;
259 }
260
261 int failback_listen(void)
262 {
263         int sock;
264         struct sockaddr_nl snl;
265         struct sockaddr_un sun;
266         socklen_t addrlen;
267         int retval;
268         int rcvbufsz = 128*1024;
269         int rcvsz = 0;
270         int rcvszsz = sizeof(rcvsz);
271         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
272         const int feature_on = 1;
273         /*
274          * First check whether we have a udev socket
275          */
276         memset(&sun, 0x00, sizeof(struct sockaddr_un));
277         sun.sun_family = AF_LOCAL;
278         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
279         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
280
281         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
282         if (sock >= 0) {
283
284                 condlog(3, "reading events from udev socket.");
285
286                 /* the bind takes care of ensuring only one copy running */
287                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
288                 if (retval < 0) {
289                         condlog(0, "bind failed, exit");
290                         goto exit;
291                 }
292
293                 /* enable receiving of the sender credentials */
294                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
295                            &feature_on, sizeof(feature_on));
296
297         } else {
298                 /* Fallback to read kernel netlink events */
299                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
300                 snl.nl_family = AF_NETLINK;
301                 snl.nl_pid = getpid();
302                 snl.nl_groups = 0x01;
303
304                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
305                 if (sock == -1) {
306                         condlog(0, "error getting socket, exit");
307                         return 1;
308                 }
309
310                 condlog(3, "reading events from kernel.");
311
312                 /*
313                  * try to avoid dropping uevents, even so, this is not a guarantee,
314                  * but it does help to change the netlink uevent socket's
315                  * receive buffer threshold from the default value of 106,496 to
316                  * the maximum value of 262,142.
317                  */
318                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
319                                     sizeof(rcvbufsz));
320
321                 if (retval < 0) {
322                         condlog(0, "error setting receive buffer size for socket, exit");
323                         exit(1);
324                 }
325                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
326                 if (retval < 0) {
327                         condlog(0, "error setting receive buffer size for socket, exit");
328                         exit(1);
329                 }
330                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
331
332                 /* enable receiving of the sender credentials */
333                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
334                            &feature_on, sizeof(feature_on));
335
336                 retval = bind(sock, (struct sockaddr *) &snl,
337                               sizeof(struct sockaddr_nl));
338                 if (retval < 0) {
339                         condlog(0, "bind failed, exit");
340                         goto exit;
341                 }
342         }
343
344         while (1) {
345                 size_t bufpos;
346                 ssize_t buflen;
347                 struct uevent *uev;
348                 struct msghdr smsg;
349                 struct iovec iov;
350                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
351                 struct cmsghdr *cmsg;
352                 struct ucred *cred;
353                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
354
355                 memset(buf, 0x00, sizeof(buf));
356                 iov.iov_base = &buf;
357                 iov.iov_len = sizeof(buf);
358                 memset (&smsg, 0x00, sizeof(struct msghdr));
359                 smsg.msg_iov = &iov;
360                 smsg.msg_iovlen = 1;
361                 smsg.msg_control = cred_msg;
362                 smsg.msg_controllen = sizeof(cred_msg);
363
364                 buflen = recvmsg(sock, &smsg, 0);
365                 if (buflen < 0) {
366                         if (errno != EINTR)
367                                 condlog(0, "error receiving message, errno %d", errno);
368                         continue;
369                 }
370
371                 cmsg = CMSG_FIRSTHDR(&smsg);
372                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
373                         condlog(3, "no sender credentials received, message ignored");
374                         continue;
375                 }
376
377                 cred = (struct ucred *)CMSG_DATA(cmsg);
378                 if (cred->uid != 0) {
379                         condlog(3, "sender uid=%d, message ignored", cred->uid);
380                         continue;
381                 }
382
383                 /* skip header */
384                 bufpos = strlen(buf) + 1;
385                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
386                         condlog(3, "invalid message length");
387                         continue;
388                 }
389
390                 /* check message header */
391                 if (strstr(buf, "@/") == NULL) {
392                         condlog(3, "unrecognized message header");
393                         continue;
394                 }
395                 if ((size_t)buflen > sizeof(buf)-1) {
396                         condlog(2, "buffer overflow for received uevent");
397                         buflen = sizeof(buf)-1;
398                 }
399
400                 uev = uevent_from_buffer(buf, buflen);
401                 if (!uev)
402                         continue;
403                 /*
404                  * Queue uevent and poke service pthread.
405                  */
406                 pthread_mutex_lock(uevq_lockp);
407                 list_add_tail(&uev->node, &uevq);
408                 pthread_cond_signal(uev_condp);
409                 pthread_mutex_unlock(uevq_lockp);
410         }
411
412 exit:
413         close(sock);
414         return 1;
415 }
416
417 struct uevent *uevent_from_udev_device(struct udev_device *dev)
418 {
419         struct uevent *uev;
420         int i = 0;
421         char *pos, *end;
422         struct udev_list_entry *list_entry;
423
424         uev = alloc_uevent();
425         if (!uev) {
426                 udev_device_unref(dev);
427                 condlog(1, "lost uevent, oom");
428                 return NULL;
429         }
430         pos = uev->buffer;
431         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
432         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
433                 const char *name, *value;
434                 int bytes;
435
436                 name = udev_list_entry_get_name(list_entry);
437                 if (!name)
438                         name = "(null)";
439                 value = udev_list_entry_get_value(list_entry);
440                 if (!value)
441                         value = "(null)";
442                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
443                 if (pos + bytes >= end) {
444                         condlog(2, "buffer overflow for uevent");
445                         break;
446                 }
447                 uev->envp[i] = pos;
448                 pos += bytes;
449                 *pos = '\0';
450                 pos++;
451                 if (strcmp(name, "DEVPATH") == 0)
452                         uev->devpath = uev->envp[i] + 8;
453                 if (strcmp(name, "ACTION") == 0)
454                         uev->action = uev->envp[i] + 7;
455                 i++;
456                 if (i == HOTPLUG_NUM_ENVP - 1)
457                         break;
458         }
459         uev->udev = dev;
460         uev->envp[i] = NULL;
461
462         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
463         uev->kernel = strrchr(uev->devpath, '/');
464         if (uev->kernel)
465                 uev->kernel++;
466
467         /* print payload environment */
468         for (i = 0; uev->envp[i] != NULL; i++)
469                 condlog(5, "%s", uev->envp[i]);
470         return uev;
471 }
472
473 int uevent_listen(struct udev *udev)
474 {
475         int err = 2;
476         struct udev_monitor *monitor = NULL;
477         int fd, fd_ep = -1, socket_flags, events;
478         int need_failback = 1;
479         int timeout = 30;
480         sigset_t mask;
481         LIST_HEAD(uevlisten_tmp);
482
483         /*
484          * Queue uevents for service by dedicated thread so that the uevent
485          * listening thread does not block on multipathd locks (vecs->lock)
486          * thereby not getting to empty the socket's receive buffer queue
487          * often enough.
488          */
489         if (!udev) {
490                 condlog(1, "no udev context");
491                 return 1;
492         }
493         udev_ref(udev);
494         pthread_cleanup_push(uevq_stop, udev);
495
496         monitor = udev_monitor_new_from_netlink(udev, "udev");
497         if (!monitor) {
498                 condlog(2, "failed to create udev monitor");
499                 goto out;
500         }
501 #ifdef LIBUDEV_API_RECVBUF
502         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
503                 condlog(2, "failed to increase buffer size");
504 #endif
505         fd = udev_monitor_get_fd(monitor);
506         if (fd < 0) {
507                 condlog(2, "failed to get monitor fd");
508                 goto out;
509         }
510         socket_flags = fcntl(fd, F_GETFL);
511         if (socket_flags < 0) {
512                 condlog(2, "failed to get monitor socket flags : %s",
513                         strerror(errno));
514                 goto out;
515         }
516         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
517                 condlog(2, "failed to set monitor socket flags : %s",
518                         strerror(errno));
519                 goto out;
520         }
521         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
522                                                               NULL);
523         if (err)
524                 condlog(2, "failed to create filter : %s", strerror(-err));
525         err = udev_monitor_enable_receiving(monitor);
526         if (err) {
527                 condlog(2, "failed to enable receiving : %s", strerror(-err));
528                 goto out;
529         }
530
531         pthread_sigmask(SIG_SETMASK, NULL, &mask);
532         events = 0;
533         while (1) {
534                 struct uevent *uev;
535                 struct udev_device *dev;
536                 struct pollfd ev_poll;
537                 struct timespec poll_timeout;
538                 int fdcount;
539
540                 memset(&ev_poll, 0, sizeof(struct pollfd));
541                 ev_poll.fd = fd;
542                 ev_poll.events = POLLIN;
543                 memset(&poll_timeout, 0, sizeof(struct timespec));
544                 poll_timeout.tv_sec = timeout;
545                 errno = 0;
546                 fdcount = ppoll(&ev_poll, 1, &poll_timeout, &mask);
547                 if (fdcount && ev_poll.revents & POLLIN) {
548                         timeout = 0;
549                         dev = udev_monitor_receive_device(monitor);
550                         if (!dev) {
551                                 condlog(0, "failed getting udev device");
552                                 continue;
553                         }
554                         uev = uevent_from_udev_device(dev);
555                         if (!uev)
556                                 continue;
557                         list_add_tail(&uev->node, &uevlisten_tmp);
558                         events++;
559                         continue;
560                 }
561                 if (fdcount < 0) {
562                         if (errno == EINTR)
563                                 continue;
564
565                         condlog(0, "error receiving "
566                                 "uevent message: %m");
567                         err = -errno;
568                         break;
569                 }
570                 if (!list_empty(&uevlisten_tmp)) {
571                         /*
572                          * Queue uevents and poke service pthread.
573                          */
574                         condlog(3, "Forwarding %d uevents", events);
575                         pthread_mutex_lock(uevq_lockp);
576                         list_splice_tail_init(&uevlisten_tmp, &uevq);
577                         pthread_cond_signal(uev_condp);
578                         pthread_mutex_unlock(uevq_lockp);
579                         events = 0;
580                 }
581                 timeout = 30;
582         }
583         need_failback = 0;
584 out:
585         if (fd_ep >= 0)
586                 close(fd_ep);
587         if (monitor)
588                 udev_monitor_unref(monitor);
589         if (need_failback)
590                 err = failback_listen();
591         pthread_cleanup_pop(1);
592         return err;
593 }
594
595 extern int
596 uevent_get_major(struct uevent *uev)
597 {
598         char *p, *q;
599         int i, major = -1;
600
601         for (i = 0; uev->envp[i] != NULL; i++) {
602                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
603                         p = uev->envp[i] + 6;
604                         major = strtoul(p, &q, 10);
605                         if (p == q) {
606                                 condlog(2, "invalid major '%s'", p);
607                                 major = -1;
608                         }
609                         break;
610                 }
611         }
612         return major;
613 }
614
615 extern int
616 uevent_get_minor(struct uevent *uev)
617 {
618         char *p, *q;
619         int i, minor = -1;
620
621         for (i = 0; uev->envp[i] != NULL; i++) {
622                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
623                         p = uev->envp[i] + 6;
624                         minor = strtoul(p, &q, 10);
625                         if (p == q) {
626                                 condlog(2, "invalid minor '%s'", p);
627                                 minor = -1;
628                         }
629                         break;
630                 }
631         }
632         return minor;
633 }
634
635 extern int
636 uevent_get_disk_ro(struct uevent *uev)
637 {
638         char *p, *q;
639         int i, ro = -1;
640
641         for (i = 0; uev->envp[i] != NULL; i++) {
642                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
643                         p = uev->envp[i] + 8;
644                         ro = strtoul(p, &q, 10);
645                         if (p == q) {
646                                 condlog(2, "invalid read_only setting '%s'", p);
647                                 ro = -1;
648                         }
649                         break;
650                 }
651         }
652         return ro;
653 }
654
655 extern char *
656 uevent_get_dm_name(struct uevent *uev)
657 {
658         char *p = NULL;
659         int i;
660
661         for (i = 0; uev->envp[i] != NULL; i++) {
662                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
663                     strlen(uev->envp[i]) > 7) {
664                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
665                         strcpy(p, uev->envp[i] + 8);
666                         break;
667                 }
668         }
669         return p;
670 }