Imported Upstream version 0.6.4
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <time.h>
33 #include <sys/socket.h>
34 #include <sys/user.h>
35 #include <sys/un.h>
36 #include <poll.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <sys/mman.h>
41 #include <libudev.h>
42 #include <errno.h>
43
44 #include "memory.h"
45 #include "debug.h"
46 #include "list.h"
47 #include "uevent.h"
48 #include "vector.h"
49
50 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
51
52 LIST_HEAD(uevq);
53 pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
54 pthread_mutex_t *uevq_lockp = &uevq_lock;
55 pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
56 pthread_cond_t *uev_condp = &uev_cond;
57 uev_trigger *my_uev_trigger;
58 void * my_trigger_data;
59 int servicing_uev;
60
61 int is_uevent_busy(void)
62 {
63         int empty;
64
65         pthread_mutex_lock(uevq_lockp);
66         empty = list_empty(&uevq);
67         pthread_mutex_unlock(uevq_lockp);
68         return (!empty || servicing_uev);
69 }
70
71 struct uevent * alloc_uevent (void)
72 {
73         struct uevent *uev = MALLOC(sizeof(struct uevent));
74
75         if (uev)
76                 INIT_LIST_HEAD(&uev->node);
77
78         return uev;
79 }
80
81 void
82 service_uevq(struct list_head *tmpq)
83 {
84         struct uevent *uev, *tmp;
85
86         list_for_each_entry_safe(uev, tmp, tmpq, node) {
87                 list_del_init(&uev->node);
88
89                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
90                         condlog(0, "uevent trigger error");
91
92                 if (uev->udev)
93                         udev_device_unref(uev->udev);
94                 FREE(uev);
95         }
96 }
97
98 static void uevent_cleanup(void *arg)
99 {
100         struct udev *udev = arg;
101
102         condlog(3, "Releasing uevent_listen() resources");
103         udev_unref(udev);
104 }
105
106 void
107 uevq_cleanup(struct list_head *tmpq)
108 {
109         struct uevent *uev, *tmp;
110
111         list_for_each_entry_safe(uev, tmp, tmpq, node) {
112                 list_del_init(&uev->node);
113                 FREE(uev);
114         }
115 }
116
117 /*
118  * Service the uevent queue.
119  */
120 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
121                     void * trigger_data)
122 {
123         my_uev_trigger = uev_trigger;
124         my_trigger_data = trigger_data;
125
126         mlockall(MCL_CURRENT | MCL_FUTURE);
127
128         while (1) {
129                 LIST_HEAD(uevq_tmp);
130
131                 pthread_mutex_lock(uevq_lockp);
132                 servicing_uev = 0;
133                 /*
134                  * Condition signals are unreliable,
135                  * so make sure we only wait if we have to.
136                  */
137                 if (list_empty(&uevq)) {
138                         pthread_cond_wait(uev_condp, uevq_lockp);
139                 }
140                 servicing_uev = 1;
141                 list_splice_init(&uevq, &uevq_tmp);
142                 pthread_mutex_unlock(uevq_lockp);
143                 if (!my_uev_trigger)
144                         break;
145                 service_uevq(&uevq_tmp);
146         }
147         condlog(3, "Terminating uev service queue");
148         uevq_cleanup(&uevq);
149         return 0;
150 }
151
152 struct uevent *uevent_from_buffer(char *buf, ssize_t buflen)
153 {
154         struct uevent *uev;
155         char *buffer;
156         size_t bufpos;
157         int i;
158         char *pos;
159
160         uev = alloc_uevent();
161         if (!uev) {
162                 condlog(1, "lost uevent, oom");
163                 return NULL;
164         }
165
166         if ((size_t)buflen > sizeof(buf)-1)
167                 buflen = sizeof(buf)-1;
168
169         /*
170          * Copy the shared receive buffer contents to buffer private
171          * to this uevent so we can immediately reuse the shared buffer.
172          */
173         memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
174         buffer = uev->buffer;
175         buffer[buflen] = '\0';
176
177         /* save start of payload */
178         bufpos = strlen(buffer) + 1;
179
180         /* action string */
181         uev->action = buffer;
182         pos = strchr(buffer, '@');
183         if (!pos) {
184                 condlog(3, "bad action string '%s'", buffer);
185                 FREE(uev);
186                 return NULL;
187         }
188         pos[0] = '\0';
189
190         /* sysfs path */
191         uev->devpath = &pos[1];
192
193         /* hotplug events have the environment attached - reconstruct envp[] */
194         for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
195                 int keylen;
196                 char *key;
197
198                 key = &buffer[bufpos];
199                 keylen = strlen(key);
200                 uev->envp[i] = key;
201                 /* Filter out sequence number */
202                 if (strncmp(key, "SEQNUM=", 7) == 0) {
203                         char *eptr;
204
205                         uev->seqnum = strtoul(key + 7, &eptr, 10);
206                         if (eptr == key + 7)
207                                 uev->seqnum = -1;
208                 }
209                 bufpos += keylen + 1;
210         }
211         uev->envp[i] = NULL;
212
213         condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum,
214                 uev->action, uev->devpath);
215         uev->kernel = strrchr(uev->devpath, '/');
216         if (uev->kernel)
217                 uev->kernel++;
218
219         /* print payload environment */
220         for (i = 0; uev->envp[i] != NULL; i++)
221                 condlog(5, "%s", uev->envp[i]);
222
223         return uev;
224 }
225
226 int failback_listen(void)
227 {
228         int sock;
229         struct sockaddr_nl snl;
230         struct sockaddr_un sun;
231         socklen_t addrlen;
232         int retval;
233         int rcvbufsz = 128*1024;
234         int rcvsz = 0;
235         int rcvszsz = sizeof(rcvsz);
236         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
237         const int feature_on = 1;
238         /*
239          * First check whether we have a udev socket
240          */
241         memset(&sun, 0x00, sizeof(struct sockaddr_un));
242         sun.sun_family = AF_LOCAL;
243         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
244         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
245
246         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
247         if (sock >= 0) {
248
249                 condlog(3, "reading events from udev socket.");
250
251                 /* the bind takes care of ensuring only one copy running */
252                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
253                 if (retval < 0) {
254                         condlog(0, "bind failed, exit");
255                         goto exit;
256                 }
257
258                 /* enable receiving of the sender credentials */
259                 retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
260                                     &feature_on, sizeof(feature_on));
261                 if (retval < 0) {
262                         condlog(0, "failed to enable credential passing, exit");
263                         goto exit;
264                 }
265
266         } else {
267                 /* Fallback to read kernel netlink events */
268                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
269                 snl.nl_family = AF_NETLINK;
270                 snl.nl_pid = getpid();
271                 snl.nl_groups = 0x01;
272
273                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
274                 if (sock == -1) {
275                         condlog(0, "error getting socket, exit");
276                         return 1;
277                 }
278
279                 condlog(3, "reading events from kernel.");
280
281                 /*
282                  * try to avoid dropping uevents, even so, this is not a guarantee,
283                  * but it does help to change the netlink uevent socket's
284                  * receive buffer threshold from the default value of 106,496 to
285                  * the maximum value of 262,142.
286                  */
287                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
288                                     sizeof(rcvbufsz));
289
290                 if (retval < 0) {
291                         condlog(0, "error setting receive buffer size for socket, exit");
292                         exit(1);
293                 }
294                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
295                 if (retval < 0) {
296                         condlog(0, "error setting receive buffer size for socket, exit");
297                         exit(1);
298                 }
299                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
300
301                 /* enable receiving of the sender credentials */
302                 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
303                                &feature_on, sizeof(feature_on)) < 0) {
304                         condlog(0, "error on enabling credential passing for socket");
305                         exit(1);
306                 }
307
308                 retval = bind(sock, (struct sockaddr *) &snl,
309                               sizeof(struct sockaddr_nl));
310                 if (retval < 0) {
311                         condlog(0, "bind failed, exit");
312                         goto exit;
313                 }
314         }
315
316         while (1) {
317                 size_t bufpos;
318                 ssize_t buflen;
319                 struct uevent *uev;
320                 struct msghdr smsg;
321                 struct iovec iov;
322                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
323                 struct cmsghdr *cmsg;
324                 struct ucred *cred;
325                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
326
327                 memset(buf, 0x00, sizeof(buf));
328                 iov.iov_base = &buf;
329                 iov.iov_len = sizeof(buf);
330                 memset (&smsg, 0x00, sizeof(struct msghdr));
331                 smsg.msg_iov = &iov;
332                 smsg.msg_iovlen = 1;
333                 smsg.msg_control = cred_msg;
334                 smsg.msg_controllen = sizeof(cred_msg);
335
336                 buflen = recvmsg(sock, &smsg, 0);
337                 if (buflen < 0) {
338                         if (errno != EINTR)
339                                 condlog(0, "error receiving message, errno %d", errno);
340                         continue;
341                 }
342
343                 cmsg = CMSG_FIRSTHDR(&smsg);
344                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
345                         condlog(3, "no sender credentials received, message ignored");
346                         continue;
347                 }
348
349                 cred = (struct ucred *)CMSG_DATA(cmsg);
350                 if (cred->uid != 0) {
351                         condlog(3, "sender uid=%d, message ignored", cred->uid);
352                         continue;
353                 }
354
355                 /* skip header */
356                 bufpos = strlen(buf) + 1;
357                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
358                         condlog(3, "invalid message length");
359                         continue;
360                 }
361
362                 /* check message header */
363                 if (strstr(buf, "@/") == NULL) {
364                         condlog(3, "unrecognized message header");
365                         continue;
366                 }
367                 if ((size_t)buflen > sizeof(buf)-1) {
368                         condlog(2, "buffer overflow for received uevent");
369                         buflen = sizeof(buf)-1;
370                 }
371
372                 uev = uevent_from_buffer(buf, buflen);
373                 if (!uev)
374                         continue;
375                 /*
376                  * Queue uevent and poke service pthread.
377                  */
378                 pthread_mutex_lock(uevq_lockp);
379                 list_add_tail(&uev->node, &uevq);
380                 pthread_cond_signal(uev_condp);
381                 pthread_mutex_unlock(uevq_lockp);
382         }
383
384 exit:
385         close(sock);
386         return 1;
387 }
388
389 struct uevent *uevent_from_udev_device(struct udev_device *dev)
390 {
391         struct uevent *uev;
392         int i = 0;
393         char *pos, *end;
394         struct udev_list_entry *list_entry;
395
396         uev = alloc_uevent();
397         if (!uev) {
398                 udev_device_unref(dev);
399                 condlog(1, "lost uevent, oom");
400                 return NULL;
401         }
402         pos = uev->buffer;
403         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
404         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
405                 const char *name, *value;
406                 int bytes;
407
408                 name = udev_list_entry_get_name(list_entry);
409                 if (!name)
410                         name = "(null)";
411                 value = udev_list_entry_get_value(list_entry);
412                 if (!value)
413                         value = "(null)";
414                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
415                 if (pos + bytes >= end) {
416                         condlog(2, "buffer overflow for uevent");
417                         break;
418                 }
419                 uev->envp[i] = pos;
420                 pos += bytes;
421                 *pos = '\0';
422                 pos++;
423                 if (strcmp(name, "DEVPATH") == 0)
424                         uev->devpath = uev->envp[i] + 8;
425                 if (strcmp(name, "ACTION") == 0)
426                         uev->action = uev->envp[i] + 7;
427                 i++;
428                 if (i == HOTPLUG_NUM_ENVP - 1)
429                         break;
430         }
431         uev->udev = dev;
432         uev->envp[i] = NULL;
433
434         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
435         uev->kernel = strrchr(uev->devpath, '/');
436         if (uev->kernel)
437                 uev->kernel++;
438
439         /* print payload environment */
440         for (i = 0; uev->envp[i] != NULL; i++)
441                 condlog(5, "%s", uev->envp[i]);
442         return uev;
443 }
444
445 int uevent_listen(struct udev *udev)
446 {
447         int err = 2;
448         struct udev_monitor *monitor = NULL;
449         int fd, socket_flags, events;
450         int need_failback = 1;
451         int timeout = 30;
452         LIST_HEAD(uevlisten_tmp);
453
454         /*
455          * Queue uevents for service by dedicated thread so that the uevent
456          * listening thread does not block on multipathd locks (vecs->lock)
457          * thereby not getting to empty the socket's receive buffer queue
458          * often enough.
459          */
460         if (!udev) {
461                 condlog(1, "no udev context");
462                 return 1;
463         }
464         udev_ref(udev);
465         pthread_cleanup_push(uevent_cleanup, udev);
466
467         monitor = udev_monitor_new_from_netlink(udev, "udev");
468         if (!monitor) {
469                 condlog(2, "failed to create udev monitor");
470                 goto out;
471         }
472 #ifdef LIBUDEV_API_RECVBUF
473         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
474                 condlog(2, "failed to increase buffer size");
475 #endif
476         fd = udev_monitor_get_fd(monitor);
477         if (fd < 0) {
478                 condlog(2, "failed to get monitor fd");
479                 goto out;
480         }
481         socket_flags = fcntl(fd, F_GETFL);
482         if (socket_flags < 0) {
483                 condlog(2, "failed to get monitor socket flags : %s",
484                         strerror(errno));
485                 goto out;
486         }
487         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
488                 condlog(2, "failed to set monitor socket flags : %s",
489                         strerror(errno));
490                 goto out;
491         }
492         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
493                                                               NULL);
494         if (err)
495                 condlog(2, "failed to create filter : %s", strerror(-err));
496         err = udev_monitor_enable_receiving(monitor);
497         if (err) {
498                 condlog(2, "failed to enable receiving : %s", strerror(-err));
499                 goto out;
500         }
501
502         events = 0;
503         while (1) {
504                 struct uevent *uev;
505                 struct udev_device *dev;
506                 struct pollfd ev_poll;
507                 int poll_timeout;
508                 int fdcount;
509
510                 memset(&ev_poll, 0, sizeof(struct pollfd));
511                 ev_poll.fd = fd;
512                 ev_poll.events = POLLIN;
513                 poll_timeout = timeout * 1000;
514                 errno = 0;
515                 fdcount = poll(&ev_poll, 1, poll_timeout);
516                 if (fdcount && ev_poll.revents & POLLIN) {
517                         timeout = 0;
518                         dev = udev_monitor_receive_device(monitor);
519                         if (!dev) {
520                                 condlog(0, "failed getting udev device");
521                                 continue;
522                         }
523                         uev = uevent_from_udev_device(dev);
524                         if (!uev)
525                                 continue;
526                         list_add_tail(&uev->node, &uevlisten_tmp);
527                         events++;
528                         continue;
529                 }
530                 if (fdcount < 0) {
531                         if (errno == EINTR)
532                                 continue;
533
534                         condlog(0, "error receiving "
535                                 "uevent message: %m");
536                         err = -errno;
537                         break;
538                 }
539                 if (!list_empty(&uevlisten_tmp)) {
540                         /*
541                          * Queue uevents and poke service pthread.
542                          */
543                         condlog(3, "Forwarding %d uevents", events);
544                         pthread_mutex_lock(uevq_lockp);
545                         list_splice_tail_init(&uevlisten_tmp, &uevq);
546                         pthread_cond_signal(uev_condp);
547                         pthread_mutex_unlock(uevq_lockp);
548                         events = 0;
549                 }
550                 timeout = 30;
551         }
552         need_failback = 0;
553 out:
554         if (monitor)
555                 udev_monitor_unref(monitor);
556         if (need_failback)
557                 err = failback_listen();
558         pthread_cleanup_pop(1);
559         return err;
560 }
561
562 extern int
563 uevent_get_major(struct uevent *uev)
564 {
565         char *p, *q;
566         int i, major = -1;
567
568         for (i = 0; uev->envp[i] != NULL; i++) {
569                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
570                         p = uev->envp[i] + 6;
571                         major = strtoul(p, &q, 10);
572                         if (p == q) {
573                                 condlog(2, "invalid major '%s'", p);
574                                 major = -1;
575                         }
576                         break;
577                 }
578         }
579         return major;
580 }
581
582 extern int
583 uevent_get_minor(struct uevent *uev)
584 {
585         char *p, *q;
586         int i, minor = -1;
587
588         for (i = 0; uev->envp[i] != NULL; i++) {
589                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
590                         p = uev->envp[i] + 6;
591                         minor = strtoul(p, &q, 10);
592                         if (p == q) {
593                                 condlog(2, "invalid minor '%s'", p);
594                                 minor = -1;
595                         }
596                         break;
597                 }
598         }
599         return minor;
600 }
601
602 extern int
603 uevent_get_disk_ro(struct uevent *uev)
604 {
605         char *p, *q;
606         int i, ro = -1;
607
608         for (i = 0; uev->envp[i] != NULL; i++) {
609                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
610                         p = uev->envp[i] + 8;
611                         ro = strtoul(p, &q, 10);
612                         if (p == q) {
613                                 condlog(2, "invalid read_only setting '%s'", p);
614                                 ro = -1;
615                         }
616                         break;
617                 }
618         }
619         return ro;
620 }
621
622 extern char *
623 uevent_get_dm_name(struct uevent *uev)
624 {
625         char *p = NULL;
626         int i;
627
628         for (i = 0; uev->envp[i] != NULL; i++) {
629                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
630                     strlen(uev->envp[i]) > 7) {
631                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
632                         strcpy(p, uev->envp[i] + 8);
633                         break;
634                 }
635         }
636         return p;
637 }