multipath: enable getting uevents through libudev
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42 #include <libudev.h>
43 #include <errno.h>
44
45 #include "memory.h"
46 #include "debug.h"
47 #include "list.h"
48 #include "uevent.h"
49
50 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
51
52 pthread_t uevq_thr;
53 LIST_HEAD(uevq);
54 pthread_mutex_t uevq_lock, *uevq_lockp = &uevq_lock;
55 pthread_cond_t  uev_cond,  *uev_condp  = &uev_cond;
56 uev_trigger *my_uev_trigger;
57 void * my_trigger_data;
58 int servicing_uev;
59
60 int is_uevent_busy(void)
61 {
62         int empty;
63
64         pthread_mutex_lock(uevq_lockp);
65         empty = list_empty(&uevq);
66         pthread_mutex_unlock(uevq_lockp);
67         return (!empty || servicing_uev);
68 }
69
70 struct uevent * alloc_uevent (void)
71 {
72         struct uevent *uev = MALLOC(sizeof(struct uevent));
73
74         if (uev)
75                 INIT_LIST_HEAD(&uev->node);
76
77         return uev;
78 }
79
80 void
81 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
82 {
83         if (pthread_attr_init(attr)) {
84                 fprintf(stderr, "can't initialize thread attr: %s\n",
85                         strerror(errno));
86                 exit(1);
87         }
88         if (stacksize < PTHREAD_STACK_MIN)
89                 stacksize = PTHREAD_STACK_MIN;
90
91         if (pthread_attr_setstacksize(attr, stacksize)) {
92                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
93                         (unsigned long)stacksize, strerror(errno));
94                 exit(1);
95         }
96         if (detached && pthread_attr_setdetachstate(attr,
97                                                     PTHREAD_CREATE_DETACHED)) {
98                 fprintf(stderr, "can't set thread to detached: %s\n",
99                         strerror(errno));
100                 exit(1);
101         }
102 }
103
104 /*
105  * Called with uevq_lockp held
106  */
107 void
108 service_uevq(struct list_head *tmpq)
109 {
110         struct uevent *uev, *tmp;
111
112         list_for_each_entry_safe(uev, tmp, tmpq, node) {
113                 list_del_init(&uev->node);
114
115                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
116                         condlog(0, "uevent trigger error");
117
118                 FREE(uev);
119         }
120 }
121
122 static void uevq_stop(void *arg)
123 {
124         condlog(3, "Stopping uev queue");
125         pthread_mutex_lock(uevq_lockp);
126         my_uev_trigger = NULL;
127         pthread_cond_signal(uev_condp);
128         pthread_mutex_unlock(uevq_lockp);
129 }
130
131 /*
132  * Service the uevent queue.
133  */
134 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
135                     void * trigger_data)
136 {
137         my_uev_trigger = uev_trigger;
138         my_trigger_data = trigger_data;
139
140         mlockall(MCL_CURRENT | MCL_FUTURE);
141
142         while (1) {
143                 LIST_HEAD(uevq_tmp);
144
145                 pthread_mutex_lock(uevq_lockp);
146                 servicing_uev = 0;
147                 /*
148                  * Condition signals are unreliable,
149                  * so make sure we only wait if we have to.
150                  */
151                 if (list_empty(&uevq)) {
152                         pthread_cond_wait(uev_condp, uevq_lockp);
153                 }
154                 servicing_uev = 1;
155                 list_splice_init(&uevq, &uevq_tmp);
156                 pthread_mutex_unlock(uevq_lockp);
157                 if (!my_uev_trigger)
158                         break;
159                 service_uevq(&uevq_tmp);
160         }
161         condlog(3, "Terminating uev service queue");
162         return 0;
163 }
164
165 int failback_listen(void)
166 {
167         int sock;
168         struct sockaddr_nl snl;
169         struct sockaddr_un sun;
170         socklen_t addrlen;
171         int retval;
172         int rcvbufsz = 128*1024;
173         int rcvsz = 0;
174         int rcvszsz = sizeof(rcvsz);
175         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
176         const int feature_on = 1;
177         /*
178          * First check whether we have a udev socket
179          */
180         memset(&sun, 0x00, sizeof(struct sockaddr_un));
181         sun.sun_family = AF_LOCAL;
182         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
183         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
184
185         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
186         if (sock >= 0) {
187
188                 condlog(3, "reading events from udev socket.");
189
190                 /* the bind takes care of ensuring only one copy running */
191                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
192                 if (retval < 0) {
193                         condlog(0, "bind failed, exit");
194                         goto exit;
195                 }
196
197                 /* enable receiving of the sender credentials */
198                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
199                            &feature_on, sizeof(feature_on));
200
201         } else {
202                 /* Fallback to read kernel netlink events */
203                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
204                 snl.nl_family = AF_NETLINK;
205                 snl.nl_pid = getpid();
206                 snl.nl_groups = 0x01;
207
208                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
209                 if (sock == -1) {
210                         condlog(0, "error getting socket, exit");
211                         return 1;
212                 }
213
214                 condlog(3, "reading events from kernel.");
215
216                 /*
217                  * try to avoid dropping uevents, even so, this is not a guarantee,
218                  * but it does help to change the netlink uevent socket's
219                  * receive buffer threshold from the default value of 106,496 to
220                  * the maximum value of 262,142.
221                  */
222                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
223                                     sizeof(rcvbufsz));
224
225                 if (retval < 0) {
226                         condlog(0, "error setting receive buffer size for socket, exit");
227                         exit(1);
228                 }
229                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
230                 if (retval < 0) {
231                         condlog(0, "error setting receive buffer size for socket, exit");
232                         exit(1);
233                 }
234                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
235
236                 /* enable receiving of the sender credentials */
237                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
238                            &feature_on, sizeof(feature_on));
239
240                 retval = bind(sock, (struct sockaddr *) &snl,
241                               sizeof(struct sockaddr_nl));
242                 if (retval < 0) {
243                         condlog(0, "bind failed, exit");
244                         goto exit;
245                 }
246         }
247
248         while (1) {
249                 int i;
250                 char *pos;
251                 size_t bufpos;
252                 ssize_t buflen;
253                 struct uevent *uev;
254                 char *buffer;
255                 struct msghdr smsg;
256                 struct iovec iov;
257                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
258                 struct cmsghdr *cmsg;
259                 struct ucred *cred;
260                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
261
262                 memset(buf, 0x00, sizeof(buf));
263                 iov.iov_base = &buf;
264                 iov.iov_len = sizeof(buf);
265                 memset (&smsg, 0x00, sizeof(struct msghdr));
266                 smsg.msg_iov = &iov;
267                 smsg.msg_iovlen = 1;
268                 smsg.msg_control = cred_msg;
269                 smsg.msg_controllen = sizeof(cred_msg);
270
271                 buflen = recvmsg(sock, &smsg, 0);
272                 if (buflen < 0) {
273                         if (errno != EINTR)
274                                 condlog(0, "error receiving message, errno %d", errno);
275                         continue;
276                 }
277
278                 cmsg = CMSG_FIRSTHDR(&smsg);
279                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
280                         condlog(3, "no sender credentials received, message ignored");
281                         continue;
282                 }
283
284                 cred = (struct ucred *)CMSG_DATA(cmsg);
285                 if (cred->uid != 0) {
286                         condlog(3, "sender uid=%d, message ignored", cred->uid);
287                         continue;
288                 }
289
290                 /* skip header */
291                 bufpos = strlen(buf) + 1;
292                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
293                         condlog(3, "invalid message length");
294                         continue;
295                 }
296
297                 /* check message header */
298                 if (strstr(buf, "@/") == NULL) {
299                         condlog(3, "unrecognized message header");
300                         continue;
301                 }
302                 if ((size_t)buflen > sizeof(buf)-1) {
303                         condlog(2, "buffer overflow for received uevent");
304                         buflen = sizeof(buf)-1;
305                 }
306
307                 uev = alloc_uevent();
308
309                 if (!uev) {
310                         condlog(1, "lost uevent, oom");
311                         continue;
312                 }
313
314                 if ((size_t)buflen > sizeof(buf)-1)
315                         buflen = sizeof(buf)-1;
316
317                 /*
318                  * Copy the shared receive buffer contents to buffer private
319                  * to this uevent so we can immediately reuse the shared buffer.
320                  */
321                 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
322                 buffer = uev->buffer;
323                 buffer[buflen] = '\0';
324
325                 /* save start of payload */
326                 bufpos = strlen(buffer) + 1;
327
328                 /* action string */
329                 uev->action = buffer;
330                 pos = strchr(buffer, '@');
331                 if (!pos) {
332                         condlog(3, "bad action string '%s'", buffer);
333                         continue;
334                 }
335                 pos[0] = '\0';
336
337                 /* sysfs path */
338                 uev->devpath = &pos[1];
339
340                 /* hotplug events have the environment attached - reconstruct envp[] */
341                 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
342                         int keylen;
343                         char *key;
344
345                         key = &buffer[bufpos];
346                         keylen = strlen(key);
347                         uev->envp[i] = key;
348                         bufpos += keylen + 1;
349                 }
350                 uev->envp[i] = NULL;
351
352                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
353                 uev->kernel = strrchr(uev->devpath, '/');
354                 if (uev->kernel)
355                         uev->kernel++;
356
357                 /* print payload environment */
358                 for (i = 0; uev->envp[i] != NULL; i++)
359                         condlog(5, "%s", uev->envp[i]);
360
361                 /*
362                  * Queue uevent and poke service pthread.
363                  */
364                 pthread_mutex_lock(uevq_lockp);
365                 list_add_tail(&uev->node, &uevq);
366                 pthread_cond_signal(uev_condp);
367                 pthread_mutex_unlock(uevq_lockp);
368         }
369
370 exit:
371         close(sock);
372         return 1;
373 }
374
375 int uevent_listen(void)
376 {
377         int err;
378         struct udev *udev = NULL;
379         struct udev_monitor *monitor = NULL;
380         int fd, socket_flags;
381         int need_failback = 1;
382         /*
383          * Queue uevents for service by dedicated thread so that the uevent
384          * listening thread does not block on multipathd locks (vecs->lock)
385          * thereby not getting to empty the socket's receive buffer queue
386          * often enough.
387          */
388         INIT_LIST_HEAD(&uevq);
389
390         pthread_mutex_init(uevq_lockp, NULL);
391         pthread_cond_init(uev_condp, NULL);
392         pthread_cleanup_push(uevq_stop, NULL);
393
394         udev = udev_new();
395         if (!udev) {
396                 condlog(2, "failed to create udev context");
397                 goto out;
398         }
399         monitor = udev_monitor_new_from_netlink(udev, "udev");
400         if (!monitor) {
401                 condlog(2, "failed to create udev monitor");
402                 goto out;
403         }
404         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024))
405                 condlog(2, "failed to increase buffer size");
406         fd = udev_monitor_get_fd(monitor);
407         if (fd < 0) {
408                 condlog(2, "failed to get monitor fd");
409                 goto out;
410         }
411         socket_flags = fcntl(fd, F_GETFL);
412         if (socket_flags < 0) {
413                 condlog(2, "failed to get monitor socket flags : %s",
414                         strerror(errno));
415                 goto out;
416         }
417         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
418                 condlog(2, "failed to set monitor socket flags : %s",
419                         strerror(errno));
420                 goto out;
421         }
422         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
423                                                               NULL);
424         if (err)
425                 condlog(2, "failed to create filter : %s\n", strerror(-err));
426         err = udev_monitor_enable_receiving(monitor);
427         if (err) {
428                 condlog(2, "failed to enable receiving : %s\n", strerror(-err));
429                 goto out;
430         }
431         while (1) {
432                 int i = 0;
433                 char *pos, *end;
434                 struct uevent *uev;
435                 struct udev_device *dev;
436                 struct udev_list_entry *list_entry;
437
438                 dev = udev_monitor_receive_device(monitor);
439                 if (!dev) {
440                         condlog(0, "failed getting udev device");
441                         continue;
442                 }
443
444                 uev = alloc_uevent();
445                 if (!uev) {
446                         condlog(1, "lost uevent, oom");
447                         continue;
448                 }
449                 pos = uev->buffer;
450                 end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
451                 udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
452                         const char *name, *value;
453                         int bytes;
454
455                         name = udev_list_entry_get_name(list_entry);
456                         if (!name)
457                                 name = "(null)";
458                         value = udev_list_entry_get_value(list_entry);
459                         if (!value)
460                                 value = "(null)";
461                         bytes = snprintf(pos, end - pos, "%s=%s", name,
462                                         value);
463                         if (pos + bytes >= end) {
464                                 condlog(2, "buffer overflow for uevent");
465                                 break;
466                         }
467                         uev->envp[i] = pos;
468                         pos += bytes;
469                         *pos = '\0';
470                         pos++;
471                         if (strcmp(name, "DEVPATH") == 0)
472                                 uev->devpath = uev->envp[i] + 8;
473                         if (strcmp(name, "ACTION") == 0)
474                                 uev->action = uev->envp[i] + 7;
475                         i++;
476                         if (i == HOTPLUG_NUM_ENVP - 1)
477                                 break;
478                 }
479                 udev_device_unref(dev);
480                 uev->envp[i] = NULL;
481
482                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
483                 uev->kernel = strrchr(uev->devpath, '/');
484                 if (uev->kernel)
485                         uev->kernel++;
486
487                 /* print payload environment */
488                 for (i = 0; uev->envp[i] != NULL; i++)
489                         condlog(5, "%s", uev->envp[i]);
490
491                 /*
492                  * Queue uevent and poke service pthread.
493                  */
494                 pthread_mutex_lock(uevq_lockp);
495                 list_add_tail(&uev->node, &uevq);
496                 pthread_cond_signal(uev_condp);
497                 pthread_mutex_unlock(uevq_lockp);
498         }
499         need_failback = 0;
500 out:
501         if (monitor)
502                 udev_monitor_unref(monitor);
503         if (udev)
504                 udev_unref(udev);
505         if (need_failback)
506                 err = failback_listen();
507         pthread_cleanup_pop(1);
508         pthread_mutex_destroy(uevq_lockp);
509         pthread_cond_destroy(uev_condp);
510         return err;
511 }
512
513 extern int
514 uevent_get_major(struct uevent *uev)
515 {
516         char *p, *q;
517         int i, major = -1;
518
519         for (i = 0; uev->envp[i] != NULL; i++) {
520                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
521                         p = uev->envp[i] + 6;
522                         major = strtoul(p, &q, 10);
523                         if (p == q) {
524                                 condlog(2, "invalid major '%s'", p);
525                                 major = -1;
526                         }
527                         break;
528                 }
529         }
530         return major;
531 }
532
533 extern int
534 uevent_get_minor(struct uevent *uev)
535 {
536         char *p, *q;
537         int i, minor = -1;
538
539         for (i = 0; uev->envp[i] != NULL; i++) {
540                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
541                         p = uev->envp[i] + 6;
542                         minor = strtoul(p, &q, 10);
543                         if (p == q) {
544                                 condlog(2, "invalid minor '%s'", p);
545                                 minor = -1;
546                         }
547                         break;
548                 }
549         }
550         return minor;
551 }
552
553 extern int
554 uevent_get_disk_ro(struct uevent *uev)
555 {
556         char *p, *q;
557         int i, ro = -1;
558
559         for (i = 0; uev->envp[i] != NULL; i++) {
560                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
561                         p = uev->envp[i] + 8;
562                         ro = strtoul(p, &q, 10);
563                         if (p == q) {
564                                 condlog(2, "invalid read_only setting '%s'", p);
565                                 ro = -1;
566                         }
567                         break;
568                 }
569         }
570         return ro;
571 }
572
573 extern char *
574 uevent_get_dm_name(struct uevent *uev)
575 {
576         char *p = NULL;
577         int i;
578
579         for (i = 0; uev->envp[i] != NULL; i++) {
580                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
581                     strlen(uev->envp[i]) > 7) {
582                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
583                         strcpy(p, uev->envp[i] + 8);
584                         break;
585                 }
586         }
587         return p;
588 }