Merge remote-tracking branch 'hannes/for-christophe'
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program; if not, write to the Free Software Foundation, Inc.,
22  *      675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <linux/types.h>
38 #include <linux/netlink.h>
39 #include <pthread.h>
40 #include <limits.h>
41 #include <sys/mman.h>
42 #include <errno.h>
43
44 #include "memory.h"
45 #include "debug.h"
46 #include "list.h"
47 #include "uevent.h"
48
49 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
50
51 pthread_t uevq_thr;
52 LIST_HEAD(uevq);
53 pthread_mutex_t uevq_lock, *uevq_lockp = &uevq_lock;
54 pthread_cond_t  uev_cond,  *uev_condp  = &uev_cond;
55 uev_trigger *my_uev_trigger;
56 void * my_trigger_data;
57 int servicing_uev;
58
59 int is_uevent_busy(void)
60 {
61         int empty;
62
63         pthread_mutex_lock(uevq_lockp);
64         empty = list_empty(&uevq);
65         pthread_mutex_unlock(uevq_lockp);
66         return (!empty || servicing_uev);
67 }
68
69 struct uevent * alloc_uevent (void)
70 {
71         struct uevent *uev = MALLOC(sizeof(struct uevent));
72
73         if (uev)
74                 INIT_LIST_HEAD(&uev->node);
75
76         return uev;
77 }
78
79 void
80 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
81 {
82         if (pthread_attr_init(attr)) {
83                 fprintf(stderr, "can't initialize thread attr: %s\n",
84                         strerror(errno));
85                 exit(1);
86         }
87         if (stacksize < PTHREAD_STACK_MIN)
88                 stacksize = PTHREAD_STACK_MIN;
89
90         if (pthread_attr_setstacksize(attr, stacksize)) {
91                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
92                         (unsigned long)stacksize, strerror(errno));
93                 exit(1);
94         }
95         if (detached && pthread_attr_setdetachstate(attr,
96                                                     PTHREAD_CREATE_DETACHED)) {
97                 fprintf(stderr, "can't set thread to detached: %s\n",
98                         strerror(errno));
99                 exit(1);
100         }
101 }
102
103 /*
104  * Called with uevq_lockp held
105  */
106 void
107 service_uevq(struct list_head *tmpq)
108 {
109         struct uevent *uev, *tmp;
110
111         list_for_each_entry_safe(uev, tmp, tmpq, node) {
112                 list_del_init(&uev->node);
113
114                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
115                         condlog(0, "uevent trigger error");
116
117                 FREE(uev);
118         }
119 }
120
121 static void uevq_stop(void *arg)
122 {
123         condlog(3, "Stopping uev queue");
124         pthread_mutex_lock(uevq_lockp);
125         my_uev_trigger = NULL;
126         pthread_cond_signal(uev_condp);
127         pthread_mutex_unlock(uevq_lockp);
128 }
129
130 /*
131  * Service the uevent queue.
132  */
133 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
134                     void * trigger_data)
135 {
136         my_uev_trigger = uev_trigger;
137         my_trigger_data = trigger_data;
138
139         mlockall(MCL_CURRENT | MCL_FUTURE);
140
141         while (1) {
142                 LIST_HEAD(uevq_tmp);
143
144                 pthread_mutex_lock(uevq_lockp);
145                 servicing_uev = 0;
146                 /*
147                  * Condition signals are unreliable,
148                  * so make sure we only wait if we have to.
149                  */
150                 if (list_empty(&uevq)) {
151                         pthread_cond_wait(uev_condp, uevq_lockp);
152                 }
153                 servicing_uev = 1;
154                 list_splice_init(&uevq, &uevq_tmp);
155                 pthread_mutex_unlock(uevq_lockp);
156                 if (!my_uev_trigger)
157                         break;
158                 service_uevq(&uevq_tmp);
159         }
160         condlog(3, "Terminating uev service queue");
161         return 0;
162 }
163
164 int uevent_listen(void)
165 {
166         int sock;
167         struct sockaddr_nl snl;
168         struct sockaddr_un sun;
169         socklen_t addrlen;
170         int retval;
171         int rcvbufsz = 128*1024;
172         int rcvsz = 0;
173         int rcvszsz = sizeof(rcvsz);
174         unsigned int *prcvszsz = (unsigned int *)&rcvszsz;
175         const int feature_on = 1;
176
177         /*
178          * Queue uevents for service by dedicated thread so that the uevent
179          * listening thread does not block on multipathd locks (vecs->lock)
180          * thereby not getting to empty the socket's receive buffer queue
181          * often enough.
182          */
183         INIT_LIST_HEAD(&uevq);
184
185         pthread_mutex_init(uevq_lockp, NULL);
186         pthread_cond_init(uev_condp, NULL);
187
188         pthread_cleanup_push(uevq_stop, NULL);
189
190         /*
191          * First check whether we have a udev socket
192          */
193         memset(&sun, 0x00, sizeof(struct sockaddr_un));
194         sun.sun_family = AF_LOCAL;
195         strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event");
196         addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1;
197
198         sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
199         if (sock >= 0) {
200
201                 condlog(3, "reading events from udev socket.");
202
203                 /* the bind takes care of ensuring only one copy running */
204                 retval = bind(sock, (struct sockaddr *) &sun, addrlen);
205                 if (retval < 0) {
206                         condlog(0, "bind failed, exit");
207                         goto exit;
208                 }
209
210                 /* enable receiving of the sender credentials */
211                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
212                            &feature_on, sizeof(feature_on));
213
214         } else {
215                 /* Fallback to read kernel netlink events */
216                 memset(&snl, 0x00, sizeof(struct sockaddr_nl));
217                 snl.nl_family = AF_NETLINK;
218                 snl.nl_pid = getpid();
219                 snl.nl_groups = 0x01;
220
221                 sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
222                 if (sock == -1) {
223                         condlog(0, "error getting socket, exit");
224                         return 1;
225                 }
226
227                 condlog(3, "reading events from kernel.");
228
229                 /*
230                  * try to avoid dropping uevents, even so, this is not a guarantee,
231                  * but it does help to change the netlink uevent socket's
232                  * receive buffer threshold from the default value of 106,496 to
233                  * the maximum value of 262,142.
234                  */
235                 retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz,
236                                     sizeof(rcvbufsz));
237
238                 if (retval < 0) {
239                         condlog(0, "error setting receive buffer size for socket, exit");
240                         exit(1);
241                 }
242                 retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz);
243                 if (retval < 0) {
244                         condlog(0, "error setting receive buffer size for socket, exit");
245                         exit(1);
246                 }
247                 condlog(3, "receive buffer size for socket is %u.", rcvsz);
248
249                 /* enable receiving of the sender credentials */
250                 setsockopt(sock, SOL_SOCKET, SO_PASSCRED,
251                            &feature_on, sizeof(feature_on));
252
253                 retval = bind(sock, (struct sockaddr *) &snl,
254                               sizeof(struct sockaddr_nl));
255                 if (retval < 0) {
256                         condlog(0, "bind failed, exit");
257                         goto exit;
258                 }
259         }
260
261         while (1) {
262                 int i;
263                 char *pos;
264                 size_t bufpos;
265                 ssize_t buflen;
266                 struct uevent *uev;
267                 char *buffer;
268                 struct msghdr smsg;
269                 struct iovec iov;
270                 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
271                 struct cmsghdr *cmsg;
272                 struct ucred *cred;
273                 static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE];
274
275                 memset(buf, 0x00, sizeof(buf));
276                 iov.iov_base = &buf;
277                 iov.iov_len = sizeof(buf);
278                 memset (&smsg, 0x00, sizeof(struct msghdr));
279                 smsg.msg_iov = &iov;
280                 smsg.msg_iovlen = 1;
281                 smsg.msg_control = cred_msg;
282                 smsg.msg_controllen = sizeof(cred_msg);
283
284                 buflen = recvmsg(sock, &smsg, 0);
285                 if (buflen < 0) {
286                         if (errno != EINTR)
287                                 condlog(0, "error receiving message, errno %d", errno);
288                         continue;
289                 }
290
291                 cmsg = CMSG_FIRSTHDR(&smsg);
292                 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
293                         condlog(3, "no sender credentials received, message ignored");
294                         continue;
295                 }
296
297                 cred = (struct ucred *)CMSG_DATA(cmsg);
298                 if (cred->uid != 0) {
299                         condlog(3, "sender uid=%d, message ignored", cred->uid);
300                         continue;
301                 }
302
303                 /* skip header */
304                 bufpos = strlen(buf) + 1;
305                 if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) {
306                         condlog(3, "invalid message length");
307                         continue;
308                 }
309
310                 /* check message header */
311                 if (strstr(buf, "@/") == NULL) {
312                         condlog(3, "unrecognized message header");
313                         continue;
314                 }
315                 if ((size_t)buflen > sizeof(buf)-1) {
316                         condlog(2, "buffer overflow for received uevent");
317                         buflen = sizeof(buf)-1;
318                 }
319
320                 uev = alloc_uevent();
321
322                 if (!uev) {
323                         condlog(1, "lost uevent, oom");
324                         continue;
325                 }
326
327                 if ((size_t)buflen > sizeof(buf)-1)
328                         buflen = sizeof(buf)-1;
329
330                 /*
331                  * Copy the shared receive buffer contents to buffer private
332                  * to this uevent so we can immediately reuse the shared buffer.
333                  */
334                 memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE);
335                 buffer = uev->buffer;
336                 buffer[buflen] = '\0';
337
338                 /* save start of payload */
339                 bufpos = strlen(buffer) + 1;
340
341                 /* action string */
342                 uev->action = buffer;
343                 pos = strchr(buffer, '@');
344                 if (!pos) {
345                         condlog(3, "bad action string '%s'", buffer);
346                         continue;
347                 }
348                 pos[0] = '\0';
349
350                 /* sysfs path */
351                 uev->devpath = &pos[1];
352
353                 /* hotplug events have the environment attached - reconstruct envp[] */
354                 for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) {
355                         int keylen;
356                         char *key;
357
358                         key = &buffer[bufpos];
359                         keylen = strlen(key);
360                         uev->envp[i] = key;
361                         bufpos += keylen + 1;
362                 }
363                 uev->envp[i] = NULL;
364
365                 condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
366                 uev->kernel = strrchr(uev->devpath, '/');
367                 if (uev->kernel)
368                         uev->kernel++;
369
370                 /* print payload environment */
371                 for (i = 0; uev->envp[i] != NULL; i++)
372                         condlog(5, "%s", uev->envp[i]);
373
374                 /*
375                  * Queue uevent and poke service pthread.
376                  */
377                 pthread_mutex_lock(uevq_lockp);
378                 list_add_tail(&uev->node, &uevq);
379                 pthread_cond_signal(uev_condp);
380                 pthread_mutex_unlock(uevq_lockp);
381         }
382
383 exit:
384         close(sock);
385
386         pthread_cleanup_pop(1);
387
388         pthread_mutex_destroy(uevq_lockp);
389         pthread_cond_destroy(uev_condp);
390
391         return 1;
392 }
393
394 extern int
395 uevent_get_major(struct uevent *uev)
396 {
397         char *p, *q;
398         int i, major = -1;
399
400         for (i = 0; uev->envp[i] != NULL; i++) {
401                 if (!strncmp(uev->envp[i], "MAJOR", 5) && strlen(uev->envp[i]) > 6) {
402                         p = uev->envp[i] + 6;
403                         major = strtoul(p, &q, 10);
404                         if (p == q) {
405                                 condlog(2, "invalid major '%s'", p);
406                                 major = -1;
407                         }
408                         break;
409                 }
410         }
411         return major;
412 }
413
414 extern int
415 uevent_get_minor(struct uevent *uev)
416 {
417         char *p, *q;
418         int i, minor = -1;
419
420         for (i = 0; uev->envp[i] != NULL; i++) {
421                 if (!strncmp(uev->envp[i], "MINOR", 5) && strlen(uev->envp[i]) > 6) {
422                         p = uev->envp[i] + 6;
423                         minor = strtoul(p, &q, 10);
424                         if (p == q) {
425                                 condlog(2, "invalid minor '%s'", p);
426                                 minor = -1;
427                         }
428                         break;
429                 }
430         }
431         return minor;
432 }
433
434 extern int
435 uevent_get_disk_ro(struct uevent *uev)
436 {
437         char *p, *q;
438         int i, ro = -1;
439
440         for (i = 0; uev->envp[i] != NULL; i++) {
441                 if (!strncmp(uev->envp[i], "DISK_RO", 6) && strlen(uev->envp[i]) > 7) {
442                         p = uev->envp[i] + 8;
443                         ro = strtoul(p, &q, 10);
444                         if (p == q) {
445                                 condlog(2, "invalid read_only setting '%s'", p);
446                                 ro = -1;
447                         }
448                         break;
449                 }
450         }
451         return ro;
452 }
453
454 extern char *
455 uevent_get_dm_name(struct uevent *uev)
456 {
457         char *p = NULL;
458         int i;
459
460         for (i = 0; uev->envp[i] != NULL; i++) {
461                 if (!strncmp(uev->envp[i], "DM_NAME", 6) &&
462                     strlen(uev->envp[i]) > 7) {
463                         p = MALLOC(strlen(uev->envp[i] + 8) + 1);
464                         strcpy(p, uev->envp[i] + 8);
465                         break;
466                 }
467         }
468         return p;
469 }