Imported Upstream version 0.8.8
[platform/upstream/multipath-tools.git] / libmultipath / uevent.c
1 /*
2  * uevent.c - trigger upon netlink uevents from the kernel
3  *
4  *      Only kernels from version 2.6.10* on provide the uevent netlink socket.
5  *      Until the libc-kernel-headers are updated, you need to compile with:
6  *
7  *        gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c
8  *
9  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
10  *
11  *      This program is free software; you can redistribute it and/or modify it
12  *      under the terms of the GNU General Public License as published by the
13  *      Free Software Foundation version 2 of the License.
14  *
15  *      This program is distributed in the hope that it will be useful, but
16  *      WITHOUT ANY WARRANTY; without even the implied warranty of
17  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *      General Public License for more details.
19  *
20  *      You should have received a copy of the GNU General Public License along
21  *      with this program.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  */
24
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <stdbool.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <sys/socket.h>
35 #include <sys/user.h>
36 #include <sys/un.h>
37 #include <poll.h>
38 #include <linux/types.h>
39 #include <linux/netlink.h>
40 #include <pthread.h>
41 #include <sys/mman.h>
42 #include <sys/time.h>
43 #include <libudev.h>
44 #include <errno.h>
45
46 #include "debug.h"
47 #include "list.h"
48 #include "uevent.h"
49 #include "vector.h"
50 #include "structs.h"
51 #include "util.h"
52 #include "config.h"
53 #include "blacklist.h"
54 #include "devmapper.h"
55
56 #define MAX_ACCUMULATION_COUNT 2048
57 #define MAX_ACCUMULATION_TIME 30*1000
58 #define MIN_BURST_SPEED 10
59
60 typedef int (uev_trigger)(struct uevent *, void * trigger_data);
61
62 static LIST_HEAD(uevq);
63 static pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER;
64 static pthread_mutex_t *uevq_lockp = &uevq_lock;
65 static pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER;
66 static pthread_cond_t *uev_condp = &uev_cond;
67 static uev_trigger *my_uev_trigger;
68 static void *my_trigger_data;
69 static int servicing_uev;
70
71 int is_uevent_busy(void)
72 {
73         int empty;
74
75         pthread_mutex_lock(uevq_lockp);
76         empty = list_empty(&uevq);
77         pthread_mutex_unlock(uevq_lockp);
78         return (!empty || servicing_uev);
79 }
80
81 struct uevent * alloc_uevent (void)
82 {
83         struct uevent *uev = calloc(1, sizeof(struct uevent));
84
85         if (uev) {
86                 INIT_LIST_HEAD(&uev->node);
87                 INIT_LIST_HEAD(&uev->merge_node);
88         }
89
90         return uev;
91 }
92
93 static void uevq_cleanup(struct list_head *tmpq);
94
95 static void cleanup_uev(void *arg)
96 {
97         struct uevent *uev = arg;
98
99         uevq_cleanup(&uev->merge_node);
100         if (uev->udev)
101                 udev_device_unref(uev->udev);
102         free(uev);
103 }
104
105 static void uevq_cleanup(struct list_head *tmpq)
106 {
107         struct uevent *uev, *tmp;
108
109         list_for_each_entry_safe(uev, tmp, tmpq, node) {
110                 list_del_init(&uev->node);
111                 cleanup_uev(uev);
112         }
113 }
114
115 static const char* uevent_get_env_var(const struct uevent *uev,
116                                       const char *attr)
117 {
118         int i;
119         size_t len;
120         const char *p = NULL;
121
122         if (attr == NULL)
123                 goto invalid;
124
125         len = strlen(attr);
126         if (len == 0)
127                 goto invalid;
128
129         for (i = 0; uev->envp[i] != NULL; i++) {
130                 const char *var = uev->envp[i];
131
132                 if (strlen(var) > len &&
133                     !memcmp(var, attr, len) && var[len] == '=') {
134                         p = var + len + 1;
135                         break;
136                 }
137         }
138
139         condlog(4, "%s: %s -> '%s'", __func__, attr, p ?: "(null)");
140         return p;
141
142 invalid:
143         condlog(2, "%s: empty variable name", __func__);
144         return NULL;
145 }
146
147 int uevent_get_env_positive_int(const struct uevent *uev,
148                                        const char *attr)
149 {
150         const char *p = uevent_get_env_var(uev, attr);
151         char *q;
152         int ret;
153
154         if (p == NULL || *p == '\0')
155                 return -1;
156
157         ret = strtoul(p, &q, 10);
158         if (*q != '\0' || ret < 0) {
159                 condlog(2, "%s: invalid %s: '%s'", __func__, attr, p);
160                 return -1;
161         }
162         return ret;
163 }
164
165 void
166 uevent_get_wwid(struct uevent *uev)
167 {
168         char *uid_attribute;
169         const char *val;
170         struct config * conf;
171
172         conf = get_multipath_config();
173         pthread_cleanup_push(put_multipath_config, conf);
174         uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel);
175         pthread_cleanup_pop(1);
176
177         val = uevent_get_env_var(uev, uid_attribute);
178         if (val)
179                 uev->wwid = val;
180 }
181
182 static bool uevent_need_merge(void)
183 {
184         struct config * conf;
185         bool need_merge = false;
186
187         conf = get_multipath_config();
188         if (VECTOR_SIZE(&conf->uid_attrs) > 0)
189                 need_merge = true;
190         put_multipath_config(conf);
191
192         return need_merge;
193 }
194
195 static bool uevent_can_discard(struct uevent *uev)
196 {
197         int invalid = 0;
198         struct config * conf;
199
200         /*
201          * do not filter dm devices by devnode
202          */
203         if (!strncmp(uev->kernel, "dm-", 3))
204                 return false;
205         /*
206          * filter paths devices by devnode
207          */
208         conf = get_multipath_config();
209         pthread_cleanup_push(put_multipath_config, conf);
210         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
211                            uev->kernel) > 0)
212                 invalid = 1;
213         pthread_cleanup_pop(1);
214
215         if (invalid)
216                 return true;
217         return false;
218 }
219
220 static bool
221 uevent_can_filter(struct uevent *earlier, struct uevent *later)
222 {
223
224         /*
225          * filter earlier uvents if path has removed later. Eg:
226          * "add path1 |chang path1 |add path2 |remove path1"
227          * can filter as:
228          * "add path2 |remove path1"
229          * uevents "add path1" and "chang path1" are filtered out
230          */
231         if (!strcmp(earlier->kernel, later->kernel) &&
232                 !strcmp(later->action, "remove") &&
233                 strncmp(later->kernel, "dm-", 3)) {
234                 return true;
235         }
236
237         /*
238          * filter change uvents if add uevents exist. Eg:
239          * "change path1| add path1 |add path2"
240          * can filter as:
241          * "add path1 |add path2"
242          * uevent "chang path1" is filtered out
243          */
244         if (!strcmp(earlier->kernel, later->kernel) &&
245                 !strcmp(earlier->action, "change") &&
246                 !strcmp(later->action, "add") &&
247                 strncmp(later->kernel, "dm-", 3)) {
248                 return true;
249         }
250
251         return false;
252 }
253
254 static bool
255 merge_need_stop(struct uevent *earlier, struct uevent *later)
256 {
257         /*
258          * dm uevent do not try to merge with left uevents
259          */
260         if (!strncmp(later->kernel, "dm-", 3))
261                 return true;
262
263         /*
264          * we can not make a jugement without wwid,
265          * so it is sensible to stop merging
266          */
267         if (!earlier->wwid || !later->wwid)
268                 return true;
269         /*
270          * uevents merging stopped
271          * when we meet an opposite action uevent from the same LUN to AVOID
272          * "add path1 |remove path1 |add path2 |remove path2 |add path3"
273          * to merge as "remove path1, path2" and "add path1, path2, path3"
274          * OR
275          * "remove path1 |add path1 |remove path2 |add path2 |remove path3"
276          * to merge as "add path1, path2" and "remove path1, path2, path3"
277          * SO
278          * when we meet a non-change uevent from the same LUN
279          * with the same wwid and different action
280          * it would be better to stop merging.
281          */
282         if (!strcmp(earlier->wwid, later->wwid) &&
283             strcmp(earlier->action, later->action) &&
284             strcmp(earlier->action, "change") &&
285             strcmp(later->action, "change"))
286                 return true;
287
288         return false;
289 }
290
291 static bool
292 uevent_can_merge(struct uevent *earlier, struct uevent *later)
293 {
294         /* merge paths uevents
295          * whose wwids exsit and are same
296          * and actions are same,
297          * and actions are addition or deletion
298          */
299         if (earlier->wwid && later->wwid &&
300             !strcmp(earlier->wwid, later->wwid) &&
301             !strcmp(earlier->action, later->action) &&
302             strncmp(earlier->action, "change", 6) &&
303             strncmp(earlier->kernel, "dm-", 3)) {
304                 return true;
305         }
306
307         return false;
308 }
309
310 static void
311 uevent_prepare(struct list_head *tmpq)
312 {
313         struct uevent *uev, *tmp;
314
315         list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
316                 if (uevent_can_discard(uev)) {
317                         list_del_init(&uev->node);
318                         if (uev->udev)
319                                 udev_device_unref(uev->udev);
320                         free(uev);
321                         continue;
322                 }
323
324                 if (strncmp(uev->kernel, "dm-", 3) &&
325                     uevent_need_merge())
326                         uevent_get_wwid(uev);
327         }
328 }
329
330 static void
331 uevent_filter(struct uevent *later, struct list_head *tmpq)
332 {
333         struct uevent *earlier, *tmp;
334
335         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
336                 /*
337                  * filter unnessary earlier uevents
338                  * by the later uevent
339                  */
340                 if (uevent_can_filter(earlier, later)) {
341                         condlog(3, "uevent: %s-%s has filtered by uevent: %s-%s",
342                                 earlier->kernel, earlier->action,
343                                 later->kernel, later->action);
344
345                         list_del_init(&earlier->node);
346                         if (earlier->udev)
347                                 udev_device_unref(earlier->udev);
348                         free(earlier);
349                 }
350         }
351 }
352
353 static void
354 uevent_merge(struct uevent *later, struct list_head *tmpq)
355 {
356         struct uevent *earlier, *tmp;
357
358         list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
359                 if (merge_need_stop(earlier, later))
360                         break;
361                 /*
362                  * merge earlier uevents to the later uevent
363                  */
364                 if (uevent_can_merge(earlier, later)) {
365                         condlog(3, "merged uevent: %s-%s-%s with uevent: %s-%s-%s",
366                                 earlier->action, earlier->kernel, earlier->wwid,
367                                 later->action, later->kernel, later->wwid);
368
369                         list_move(&earlier->node, &later->merge_node);
370                 }
371         }
372 }
373
374 static void
375 merge_uevq(struct list_head *tmpq)
376 {
377         struct uevent *later;
378
379         uevent_prepare(tmpq);
380         list_for_each_entry_reverse(later, tmpq, node) {
381                 uevent_filter(later, tmpq);
382                 if(uevent_need_merge())
383                         uevent_merge(later, tmpq);
384         }
385 }
386
387 static void
388 service_uevq(struct list_head *tmpq)
389 {
390         struct uevent *uev, *tmp;
391
392         list_for_each_entry_safe(uev, tmp, tmpq, node) {
393                 list_del_init(&uev->node);
394
395                 pthread_cleanup_push(cleanup_uev, uev);
396                 if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
397                         condlog(0, "uevent trigger error");
398                 pthread_cleanup_pop(1);
399         }
400 }
401
402 static void uevent_cleanup(void *arg)
403 {
404         struct udev *udev = arg;
405
406         condlog(3, "Releasing uevent_listen() resources");
407         udev_unref(udev);
408 }
409
410 static void monitor_cleanup(void *arg)
411 {
412         struct udev_monitor *monitor = arg;
413
414         condlog(3, "Releasing uevent_monitor() resources");
415         udev_monitor_unref(monitor);
416 }
417
418 static void cleanup_uevq(void *arg)
419 {
420         uevq_cleanup(arg);
421 }
422
423 static void cleanup_global_uevq(void *arg __attribute__((unused)))
424 {
425         pthread_mutex_lock(uevq_lockp);
426         uevq_cleanup(&uevq);
427         pthread_mutex_unlock(uevq_lockp);
428 }
429
430 /*
431  * Service the uevent queue.
432  */
433 int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
434                     void * trigger_data)
435 {
436         my_uev_trigger = uev_trigger;
437         my_trigger_data = trigger_data;
438
439         mlockall(MCL_CURRENT | MCL_FUTURE);
440
441         while (1) {
442                 LIST_HEAD(uevq_tmp);
443
444                 pthread_cleanup_push(cleanup_mutex, uevq_lockp);
445                 pthread_mutex_lock(uevq_lockp);
446                 servicing_uev = 0;
447                 /*
448                  * Condition signals are unreliable,
449                  * so make sure we only wait if we have to.
450                  */
451                 if (list_empty(&uevq)) {
452                         pthread_cond_wait(uev_condp, uevq_lockp);
453                 }
454                 servicing_uev = 1;
455                 list_splice_init(&uevq, &uevq_tmp);
456                 pthread_cleanup_pop(1);
457
458                 if (!my_uev_trigger)
459                         break;
460
461                 pthread_cleanup_push(cleanup_uevq, &uevq_tmp);
462                 merge_uevq(&uevq_tmp);
463                 service_uevq(&uevq_tmp);
464                 pthread_cleanup_pop(1);
465         }
466         condlog(3, "Terminating uev service queue");
467         return 0;
468 }
469
470 static struct uevent *uevent_from_udev_device(struct udev_device *dev)
471 {
472         struct uevent *uev;
473         int i = 0;
474         char *pos, *end;
475         struct udev_list_entry *list_entry;
476
477         uev = alloc_uevent();
478         if (!uev) {
479                 udev_device_unref(dev);
480                 condlog(1, "lost uevent, oom");
481                 return NULL;
482         }
483         pos = uev->buffer;
484         end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1;
485         udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) {
486                 const char *name, *value;
487                 int bytes;
488
489                 name = udev_list_entry_get_name(list_entry);
490                 if (!name)
491                         name = "(null)";
492                 value = udev_list_entry_get_value(list_entry);
493                 if (!value)
494                         value = "(null)";
495                 bytes = snprintf(pos, end - pos, "%s=%s", name, value);
496                 if (pos + bytes >= end) {
497                         condlog(2, "buffer overflow for uevent");
498                         break;
499                 }
500                 uev->envp[i] = pos;
501                 pos += bytes;
502                 *pos = '\0';
503                 pos++;
504                 if (strcmp(name, "DEVPATH") == 0)
505                         uev->devpath = uev->envp[i] + 8;
506                 if (strcmp(name, "ACTION") == 0)
507                         uev->action = uev->envp[i] + 7;
508                 i++;
509                 if (i == HOTPLUG_NUM_ENVP - 1)
510                         break;
511         }
512         if (!uev->devpath || ! uev->action) {
513                 udev_device_unref(dev);
514                 condlog(1, "uevent missing necessary fields");
515                 free(uev);
516                 return NULL;
517         }
518         uev->udev = dev;
519         uev->envp[i] = NULL;
520
521         condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath);
522         uev->kernel = strrchr(uev->devpath, '/');
523         if (uev->kernel)
524                 uev->kernel++;
525
526         /* print payload environment */
527         for (i = 0; uev->envp[i] != NULL; i++)
528                 condlog(5, "%s", uev->envp[i]);
529         return uev;
530 }
531
532 static bool uevent_burst(struct timeval *start_time, int events)
533 {
534         struct timeval diff_time, end_time;
535         unsigned long speed;
536         unsigned long eclipse_ms;
537
538         if(events > MAX_ACCUMULATION_COUNT) {
539                 condlog(2, "burst got %u uevents, too much uevents, stopped", events);
540                 return false;
541         }
542
543         gettimeofday(&end_time, NULL);
544         timersub(&end_time, start_time, &diff_time);
545
546         eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
547
548         if (eclipse_ms == 0)
549                 return true;
550
551         if (eclipse_ms > MAX_ACCUMULATION_TIME) {
552                 condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
553                 return false;
554         }
555
556         speed = (events * 1000) / eclipse_ms;
557         if (speed > MIN_BURST_SPEED)
558                 return true;
559
560         return false;
561 }
562
563 int uevent_listen(struct udev *udev)
564 {
565         int err = 2;
566         struct udev_monitor *monitor = NULL;
567         int fd, socket_flags, events;
568         struct timeval start_time;
569         int timeout = 30;
570         LIST_HEAD(uevlisten_tmp);
571
572         /*
573          * Queue uevents for service by dedicated thread so that the uevent
574          * listening thread does not block on multipathd locks (vecs->lock)
575          * thereby not getting to empty the socket's receive buffer queue
576          * often enough.
577          */
578         if (!udev) {
579                 condlog(1, "no udev context");
580                 return 1;
581         }
582         udev_ref(udev);
583         pthread_cleanup_push(uevent_cleanup, udev);
584
585         monitor = udev_monitor_new_from_netlink(udev, "udev");
586         if (!monitor) {
587                 condlog(2, "failed to create udev monitor");
588                 goto out_udev;
589         }
590         pthread_cleanup_push(monitor_cleanup, monitor);
591 #ifdef LIBUDEV_API_RECVBUF
592         if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024) < 0)
593                 condlog(2, "failed to increase buffer size");
594 #endif
595         fd = udev_monitor_get_fd(monitor);
596         if (fd < 0) {
597                 condlog(2, "failed to get monitor fd");
598                 goto out;
599         }
600         socket_flags = fcntl(fd, F_GETFL);
601         if (socket_flags < 0) {
602                 condlog(2, "failed to get monitor socket flags : %s",
603                         strerror(errno));
604                 goto out;
605         }
606         if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) {
607                 condlog(2, "failed to set monitor socket flags : %s",
608                         strerror(errno));
609                 goto out;
610         }
611         err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
612                                                               "disk");
613         if (err)
614                 condlog(2, "failed to create filter : %s", strerror(-err));
615         err = udev_monitor_enable_receiving(monitor);
616         if (err) {
617                 condlog(2, "failed to enable receiving : %s", strerror(-err));
618                 goto out;
619         }
620
621         events = 0;
622         gettimeofday(&start_time, NULL);
623         pthread_cleanup_push(cleanup_global_uevq, NULL);
624         pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp);
625         while (1) {
626                 struct uevent *uev;
627                 struct udev_device *dev;
628                 struct pollfd ev_poll;
629                 int poll_timeout;
630                 int fdcount;
631
632                 memset(&ev_poll, 0, sizeof(struct pollfd));
633                 ev_poll.fd = fd;
634                 ev_poll.events = POLLIN;
635                 poll_timeout = timeout * 1000;
636                 errno = 0;
637                 fdcount = poll(&ev_poll, 1, poll_timeout);
638                 if (fdcount > 0 && ev_poll.revents & POLLIN) {
639                         timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
640                         dev = udev_monitor_receive_device(monitor);
641                         if (!dev) {
642                                 condlog(0, "failed getting udev device");
643                                 continue;
644                         }
645                         uev = uevent_from_udev_device(dev);
646                         if (!uev)
647                                 continue;
648                         list_add_tail(&uev->node, &uevlisten_tmp);
649                         events++;
650                         continue;
651                 }
652                 if (fdcount < 0) {
653                         if (errno == EINTR)
654                                 continue;
655
656                         condlog(0, "error receiving "
657                                 "uevent message: %m");
658                         err = -errno;
659                         break;
660                 }
661                 if (!list_empty(&uevlisten_tmp)) {
662                         /*
663                          * Queue uevents and poke service pthread.
664                          */
665                         condlog(3, "Forwarding %d uevents", events);
666                         pthread_mutex_lock(uevq_lockp);
667                         list_splice_tail_init(&uevlisten_tmp, &uevq);
668                         pthread_cond_signal(uev_condp);
669                         pthread_mutex_unlock(uevq_lockp);
670                         events = 0;
671                 }
672                 gettimeofday(&start_time, NULL);
673                 timeout = 30;
674         }
675         pthread_cleanup_pop(1);
676         pthread_cleanup_pop(1);
677 out:
678         pthread_cleanup_pop(1);
679 out_udev:
680         pthread_cleanup_pop(1);
681         return err;
682 }
683
684 char *uevent_get_dm_str(const struct uevent *uev, char *attr)
685 {
686         const char *tmp = uevent_get_env_var(uev, attr);
687
688         if (tmp == NULL)
689                 return NULL;
690         return strdup(tmp);
691 }
692
693 bool uevent_is_mpath(const struct uevent *uev)
694 {
695         const char *uuid = uevent_get_env_var(uev, "DM_UUID");
696
697         if (uuid == NULL)
698                 return false;
699         if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN))
700                 return false;
701         return uuid[UUID_PREFIX_LEN] != '\0';
702 }